diff options
author | jsjant <jsjant@gmail.com> | 2025-04-23 17:59:33 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-04-23 17:59:33 +0300 |
commit | 0555fc053f1d41dd454dba019a88f786c802af15 (patch) | |
tree | 811f6da78a82c36a6ef8fa2e8eef05c5fc0991d1 | |
parent | 5b143711f0ed0d12b1bbb59a0c8f33aa543da6f2 (diff) | |
download | ydb-0555fc053f1d41dd454dba019a88f786c802af15.tar.gz |
Introduce Intersect operation to Roaring UDF (#17611)
3 files changed, 265 insertions, 1 deletions
diff --git a/ydb/library/yql/udfs/common/roaring/roaring.cpp b/ydb/library/yql/udfs/common/roaring/roaring.cpp index ba1e559d3fa..a20b788a309 100644 --- a/ydb/library/yql/udfs/common/roaring/roaring.cpp +++ b/ydb/library/yql/udfs/common/roaring/roaring.cpp @@ -428,6 +428,65 @@ namespace { } }; + class TRoaringIntersect: public TBoxedValue { + public: + TRoaringIntersect(TSourcePosition pos) + : Pos_(pos) + { + } + + static TStringRef Name() { + return TStringRef::Of("Intersect"); + } + + private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + Y_UNUSED(valueBuilder); + try { + auto* left = GetBitmapFromArg(args[0]); + auto* right = GetBitmapFromArg(args[1]); + + return TUnboxedValuePod(roaring_bitmap_intersect(left, right)); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + TSourcePosition Pos_; + }; + + class TRoaringIntersectWithBinary: public TBoxedValue { + public: + TRoaringIntersectWithBinary(TSourcePosition pos) + : Pos_(pos) + { + } + + static TStringRef Name() { + return TStringRef::Of("IntersectWithBinary"); + } + + private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + Y_UNUSED(valueBuilder); + try { + auto* left = GetBitmapFromArg(args[0]); + auto* right = DeserializePortable(args[1].AsStringRef()); + + auto intersect = roaring_bitmap_intersect(left, right); + roaring_bitmap_free(right); + return TUnboxedValuePod(intersect); + + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).data()); + } + } + + TSourcePosition Pos_; + }; + class TRoaringModule: public IUdfModule { public: class TMemoryHookInitializer { @@ -611,6 +670,24 @@ namespace { if (!typesOnly) { builder.Implementation(new TRoaringNaiveBulkAndWithBinary(builder.GetSourcePosition())); } + } else if (TRoaringIntersect::Name() == name) { + builder.Returns<bool>() + .Args() + ->Add<TAutoMap<TResource<RoaringResourceName>>>() + .Add<TAutoMap<TResource<RoaringResourceName>>>(); + + if (!typesOnly) { + builder.Implementation(new TRoaringIntersect(builder.GetSourcePosition())); + } + } else if (TRoaringIntersectWithBinary::Name() == name) { + builder.Returns<bool>() + .Args() + ->Add<TAutoMap<TResource<RoaringResourceName>>>() + .Add<TAutoMap<char*>>(); + + if (!typesOnly) { + builder.Implementation(new TRoaringIntersectWithBinary(builder.GetSourcePosition())); + } } else { TStringBuilder sb; sb << "Unknown function: " << name.Data(); diff --git a/ydb/library/yql/udfs/common/roaring/test/canondata/test.test_intersect_/results.txt b/ydb/library/yql/udfs/common/roaring/test/canondata/test.test_intersect_/results.txt index 9c9ed2126d9..f14baff2331 100644 --- a/ydb/library/yql/udfs/common/roaring/test/canondata/test.test_intersect_/results.txt +++ b/ydb/library/yql/udfs/common/roaring/test/canondata/test.test_intersect_/results.txt @@ -476,5 +476,185 @@ ] } ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "Intersect"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + %true + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "IntersectNull0"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + # + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "IntersectNull1"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + # + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "IntersectWithBinary"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + %true + ] + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "IntersectWithBinaryNull"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + # + ] + ] + } + ] + }; + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "IntersectFalse"; + [ + "OptionalType"; + [ + "DataType"; + "Bool" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + %false + ] + ] + ] + } + ] } ]
\ No newline at end of file diff --git a/ydb/library/yql/udfs/common/roaring/test/cases/intersect.sql b/ydb/library/yql/udfs/common/roaring/test/cases/intersect.sql index bdd9319a06f..ba9e53c1757 100644 --- a/ydb/library/yql/udfs/common/roaring/test/cases/intersect.sql +++ b/ydb/library/yql/udfs/common/roaring/test/cases/intersect.sql @@ -15,4 +15,11 @@ SELECT Roaring::Uint32List(Roaring::AndNotWithBinary(Roaring::Deserialize(right) SELECT Roaring::Uint32List(Roaring::AndNotWithBinary(Roaring::Deserialize(right), NULL, true)) AS AndNotWithBinaryListEmptyInplace FROM Input; SELECT Roaring::Uint32List(Roaring::NaiveBulkAnd(AsList(Roaring::Deserialize(right), Roaring::Deserialize(left)))) AS NaiveBulkAnd FROM Input; -SELECT Roaring::Uint32List(Roaring::NaiveBulkAndWithBinary(AsList(right, left))) AS NaiveBulkAndWithBinary FROM Input;
\ No newline at end of file +SELECT Roaring::Uint32List(Roaring::NaiveBulkAndWithBinary(AsList(right, left))) AS NaiveBulkAndWithBinary FROM Input; + +SELECT Roaring::Intersect(Roaring::Deserialize(right), Roaring::Deserialize(left)) AS Intersect FROM Input; +SELECT Roaring::Intersect(NULL, Roaring::Deserialize(left)) AS IntersectNull0 FROM Input; +SELECT Roaring::Intersect(Roaring::Deserialize(right), NULL) AS IntersectNull1 FROM Input; +SELECT Roaring::IntersectWithBinary(Roaring::Deserialize(right), left) AS IntersectWithBinary FROM Input; +SELECT Roaring::IntersectWithBinary(Roaring::Deserialize(right), NULL) AS IntersectWithBinaryNull FROM Input; +SELECT Roaring::Intersect(Roaring::Deserialize(right), Roaring::FromUint32List(AsList(100500))) AS IntersectFalse FROM Input; |