summaryrefslogtreecommitdiffstats
path: root/yql/essentials/udfs
diff options
context:
space:
mode:
authorziganshinmr <[email protected]>2024-11-21 21:45:01 +0300
committerziganshinmr <[email protected]>2024-11-21 21:57:57 +0300
commitc320ff3884640f83278ad36e5feeed263b523bd4 (patch)
treee2377204a3b9b060188178a1de02641b0da04aa8 /yql/essentials/udfs
parent00bc077e8f2272cd0206de2bca64c53300982883 (diff)
ListSample/ListSampleN/ListShuffle implementation
commit_hash:987b10b398caa89eee8b94b33f9ea1dc74197223
Diffstat (limited to 'yql/essentials/udfs')
-rw-r--r--yql/essentials/udfs/common/vector/test/canondata/result.json7
-rw-r--r--yql/essentials/udfs/common/vector/test/canondata/test.test_Vector_/results.txt65
-rw-r--r--yql/essentials/udfs/common/vector/test/cases/Vector.sql23
-rw-r--r--yql/essentials/udfs/common/vector/test/ya.make13
-rw-r--r--yql/essentials/udfs/common/vector/vector_udf.cpp192
-rw-r--r--yql/essentials/udfs/common/vector/ya.make17
-rw-r--r--yql/essentials/udfs/common/ya.make1
7 files changed, 318 insertions, 0 deletions
diff --git a/yql/essentials/udfs/common/vector/test/canondata/result.json b/yql/essentials/udfs/common/vector/test/canondata/result.json
new file mode 100644
index 00000000000..9d8010bca31
--- /dev/null
+++ b/yql/essentials/udfs/common/vector/test/canondata/result.json
@@ -0,0 +1,7 @@
+{
+ "test.test[Vector]": [
+ {
+ "uri": "file://test.test_Vector_/results.txt"
+ }
+ ]
+}
diff --git a/yql/essentials/udfs/common/vector/test/canondata/test.test_Vector_/results.txt b/yql/essentials/udfs/common/vector/test/canondata/test.test_Vector_/results.txt
new file mode 100644
index 00000000000..f7bb0dbd8c6
--- /dev/null
+++ b/yql/essentials/udfs/common/vector/test/canondata/test.test_Vector_/results.txt
@@ -0,0 +1,65 @@
+[
+ {
+ "Write" = [
+ {
+ "Type" = [
+ "ListType";
+ [
+ "StructType";
+ [
+ [
+ "column0";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column1";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ];
+ [
+ "column2";
+ [
+ "ListType";
+ [
+ "DataType";
+ "String"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+ "Data" = [
+ [
+ [
+ "test1";
+ "test2";
+ "test3"
+ ];
+ [
+ "test1";
+ "test22";
+ "test3"
+ ];
+ [
+ "test3";
+ "test22";
+ "test1"
+ ]
+ ]
+ ]
+ }
+ ]
+ }
+] \ No newline at end of file
diff --git a/yql/essentials/udfs/common/vector/test/cases/Vector.sql b/yql/essentials/udfs/common/vector/test/cases/Vector.sql
new file mode 100644
index 00000000000..d25c6544204
--- /dev/null
+++ b/yql/essentials/udfs/common/vector/test/cases/Vector.sql
@@ -0,0 +1,23 @@
+/* syntax version 1 */
+
+$typing = TupleType(VoidType(), VoidType(), String);
+
+$vectorCreate = YQL::Udf(AsAtom("Vector.Create"), Void(), $typing);
+$vectorEmplace = YQL::Udf(AsAtom("Vector.Emplace"), Void(), $typing);
+$vectorSwap = YQL::Udf(AsAtom("Vector.Swap"), Void(), $typing);
+$vectorGetResult = YQL::Udf(AsAtom("Vector.GetResult"), Void(), $typing);
+
+$a = $vectorCreate(0);
+
+$a = $vectorEmplace($a, 0, "test1");
+$a = $vectorEmplace($a, 1, "test2");
+$a = $vectorEmplace($a, 2, "test3");
+$state1 = $vectorGetResult($a);
+
+$a = $vectorEmplace($a, 1, "test22");
+$state2 = $vectorGetResult($a);
+
+$a = $vectorSwap($a, 0, 2);
+$state3 = $vectorGetResult($a);
+
+SELECT $state1, $state2, $state3;
diff --git a/yql/essentials/udfs/common/vector/test/ya.make b/yql/essentials/udfs/common/vector/test/ya.make
new file mode 100644
index 00000000000..0c80f4ea677
--- /dev/null
+++ b/yql/essentials/udfs/common/vector/test/ya.make
@@ -0,0 +1,13 @@
+YQL_UDF_TEST_CONTRIB()
+
+DEPENDS(yql/essentials/udfs/common/vector)
+
+TIMEOUT(300)
+
+SIZE(MEDIUM)
+
+IF (SANITIZER_TYPE == "memory")
+ TAG(ya:not_autocheck) # YQL-15385
+ENDIF()
+
+END()
diff --git a/yql/essentials/udfs/common/vector/vector_udf.cpp b/yql/essentials/udfs/common/vector/vector_udf.cpp
new file mode 100644
index 00000000000..e8b01e5a05c
--- /dev/null
+++ b/yql/essentials/udfs/common/vector/vector_udf.cpp
@@ -0,0 +1,192 @@
+#include <yql/essentials/public/udf/udf_type_ops.h>
+#include <yql/essentials/public/udf/udf_helpers.h>
+
+#include <vector>
+
+using namespace NKikimr;
+using namespace NUdf;
+
+namespace {
+
+class TVector {
+private:
+ std::vector<TUnboxedValue, TUnboxedValue::TAllocator> Vector;
+
+public:
+ TVector()
+ : Vector()
+ {}
+
+ TUnboxedValue GetResult(const IValueBuilder* builder) {
+ TUnboxedValue* values = nullptr;
+ auto list = builder->NewArray(Vector.size(), values);
+ std::copy(Vector.begin(), Vector.end(), values);
+
+ return list;
+ }
+
+ void Emplace(const ui64 index, const TUnboxedValuePod& value) {
+ if (index < Vector.size()) {
+ Vector[index] = value;
+ } else {
+ Vector.push_back(value);
+ }
+ }
+
+ void Swap(const ui64 a, const ui64 b) {
+ if (a < Vector.size() && b < Vector.size()) {
+ std::swap(Vector[a], Vector[b]);
+ }
+ }
+
+ void Reserve(ui64 expectedSize) {
+ Vector.reserve(expectedSize);
+ }
+};
+
+extern const char VectorResourceName[] = "Vector.VectorResource";
+class TVectorResource:
+ public TBoxedResource<TVector, VectorResourceName>
+{
+public:
+ template <typename... Args>
+ inline TVectorResource(Args&&... args)
+ : TBoxedResource(std::forward<Args>(args)...)
+ {}
+};
+
+TVectorResource* GetVectorResource(const TUnboxedValuePod& arg) {
+ TVectorResource::Validate(arg);
+ return static_cast<TVectorResource*>(arg.AsBoxed().Get());
+}
+
+class TVectorCreate: public TBoxedValue {
+private:
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override {
+ auto resource = new TVectorResource;
+ resource->Get()->Reserve(args[0].Get<ui64>());
+ return TUnboxedValuePod(resource);
+ }
+};
+
+class TVectorEmplace: public TBoxedValue {
+private:
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override {
+ auto resource = GetVectorResource(args[0]);
+ resource->Get()->Emplace(args[1].Get<ui64>(), args[2]);
+ return TUnboxedValuePod(resource);
+ }
+};
+
+class TVectorSwap: public TBoxedValue {
+private:
+ TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override {
+ auto resource = GetVectorResource(args[0]);
+ resource->Get()->Swap(args[1].Get<ui64>(), args[2].Get<ui64>());
+ return TUnboxedValuePod(resource);
+ }
+};
+
+class TVectorGetResult: public TBoxedValue {
+private:
+ TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override {
+ return GetVectorResource(args[0])->Get()->GetResult(valueBuilder);
+ }
+};
+
+static const auto CreateName = TStringRef::Of("Create");
+static const auto EmplaceName = TStringRef::Of("Emplace");
+static const auto SwapName = TStringRef::Of("Swap");
+static const auto GetResultName = TStringRef::Of("GetResult");
+
+class TVectorModule: public IUdfModule {
+public:
+ TStringRef Name() const {
+ return TStringRef::Of("Vector");
+ }
+
+ void CleanupOnTerminate() const final {
+ }
+
+ void GetAllFunctions(IFunctionsSink& sink) const final {
+ sink.Add(CreateName)->SetTypeAwareness();
+ sink.Add(EmplaceName)->SetTypeAwareness();
+ sink.Add(SwapName)->SetTypeAwareness();
+ sink.Add(GetResultName)->SetTypeAwareness();
+ }
+
+ void BuildFunctionTypeInfo(
+ const TStringRef& name,
+ TType* userType,
+ const TStringRef& typeConfig,
+ ui32 flags,
+ IFunctionTypeInfoBuilder& builder) const final
+ {
+ Y_UNUSED(typeConfig);
+
+ try {
+ const bool typesOnly = (flags & TFlags::TypesOnly);
+ builder.UserType(userType);
+
+ auto typeHelper = builder.TypeInfoHelper();
+
+ auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType);
+ if (!userTypeInspector || userTypeInspector.GetElementsCount() != 3) {
+ builder.SetError("User type is not a 3-tuple");
+ return;
+ }
+
+ auto valueType = userTypeInspector.GetElementType(2);
+ TType* vectorType = builder.Resource(VectorResourceName);
+
+ if (name == CreateName) {
+ builder.IsStrict();
+
+ builder.Args()->Add<ui64>().Done().Returns(vectorType);
+
+ if (!typesOnly) {
+ builder.Implementation(new TVectorCreate);
+ }
+ }
+
+ if (name == EmplaceName) {
+ builder.IsStrict();
+
+ builder.Args()->Add(vectorType).Add<ui64>().Add(valueType).Done().Returns(vectorType);
+
+ if (!typesOnly) {
+ builder.Implementation(new TVectorEmplace);
+ }
+ }
+
+ if (name == SwapName) {
+ builder.IsStrict();
+
+ builder.Args()->Add(vectorType).Add<ui64>().Add<ui64>().Done().Returns(vectorType);
+
+ if (!typesOnly) {
+ builder.Implementation(new TVectorSwap);
+ }
+ }
+
+ if (name == GetResultName) {
+ auto resultType = builder.List()->Item(valueType).Build();
+
+ builder.IsStrict();
+
+ builder.Args()->Add(vectorType).Done().Returns(resultType);
+
+ if (!typesOnly) {
+ builder.Implementation(new TVectorGetResult);
+ }
+ }
+
+ } catch (const std::exception& e) {
+ builder.SetError(CurrentExceptionMessage());
+ }
+ }
+};
+
+} // namespace
+
+REGISTER_MODULES(TVectorModule)
diff --git a/yql/essentials/udfs/common/vector/ya.make b/yql/essentials/udfs/common/vector/ya.make
new file mode 100644
index 00000000000..a1403f62a61
--- /dev/null
+++ b/yql/essentials/udfs/common/vector/ya.make
@@ -0,0 +1,17 @@
+YQL_UDF_CONTRIB(vector_udf)
+
+YQL_ABI_VERSION(
+ 2
+ 35
+ 0
+)
+
+SRCS(
+ vector_udf.cpp
+)
+
+END()
+
+RECURSE_FOR_TESTS(
+ test
+)
diff --git a/yql/essentials/udfs/common/ya.make b/yql/essentials/udfs/common/ya.make
index 29266857edf..415f9f9b389 100644
--- a/yql/essentials/udfs/common/ya.make
+++ b/yql/essentials/udfs/common/ya.make
@@ -21,6 +21,7 @@ RECURSE(
topfreq
unicode_base
url_base
+ vector
yson2
)