diff options
| author | ziganshinmr <[email protected]> | 2024-11-21 21:45:01 +0300 |
|---|---|---|
| committer | ziganshinmr <[email protected]> | 2024-11-21 21:57:57 +0300 |
| commit | c320ff3884640f83278ad36e5feeed263b523bd4 (patch) | |
| tree | e2377204a3b9b060188178a1de02641b0da04aa8 /yql/essentials/udfs | |
| parent | 00bc077e8f2272cd0206de2bca64c53300982883 (diff) | |
ListSample/ListSampleN/ListShuffle implementation
commit_hash:987b10b398caa89eee8b94b33f9ea1dc74197223
Diffstat (limited to 'yql/essentials/udfs')
7 files changed, 318 insertions, 0 deletions
diff --git a/yql/essentials/udfs/common/vector/test/canondata/result.json b/yql/essentials/udfs/common/vector/test/canondata/result.json new file mode 100644 index 00000000000..9d8010bca31 --- /dev/null +++ b/yql/essentials/udfs/common/vector/test/canondata/result.json @@ -0,0 +1,7 @@ +{ + "test.test[Vector]": [ + { + "uri": "file://test.test_Vector_/results.txt" + } + ] +} diff --git a/yql/essentials/udfs/common/vector/test/canondata/test.test_Vector_/results.txt b/yql/essentials/udfs/common/vector/test/canondata/test.test_Vector_/results.txt new file mode 100644 index 00000000000..f7bb0dbd8c6 --- /dev/null +++ b/yql/essentials/udfs/common/vector/test/canondata/test.test_Vector_/results.txt @@ -0,0 +1,65 @@ +[ + { + "Write" = [ + { + "Type" = [ + "ListType"; + [ + "StructType"; + [ + [ + "column0"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column1"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ]; + [ + "column2"; + [ + "ListType"; + [ + "DataType"; + "String" + ] + ] + ] + ] + ] + ]; + "Data" = [ + [ + [ + "test1"; + "test2"; + "test3" + ]; + [ + "test1"; + "test22"; + "test3" + ]; + [ + "test3"; + "test22"; + "test1" + ] + ] + ] + } + ] + } +]
\ No newline at end of file diff --git a/yql/essentials/udfs/common/vector/test/cases/Vector.sql b/yql/essentials/udfs/common/vector/test/cases/Vector.sql new file mode 100644 index 00000000000..d25c6544204 --- /dev/null +++ b/yql/essentials/udfs/common/vector/test/cases/Vector.sql @@ -0,0 +1,23 @@ +/* syntax version 1 */ + +$typing = TupleType(VoidType(), VoidType(), String); + +$vectorCreate = YQL::Udf(AsAtom("Vector.Create"), Void(), $typing); +$vectorEmplace = YQL::Udf(AsAtom("Vector.Emplace"), Void(), $typing); +$vectorSwap = YQL::Udf(AsAtom("Vector.Swap"), Void(), $typing); +$vectorGetResult = YQL::Udf(AsAtom("Vector.GetResult"), Void(), $typing); + +$a = $vectorCreate(0); + +$a = $vectorEmplace($a, 0, "test1"); +$a = $vectorEmplace($a, 1, "test2"); +$a = $vectorEmplace($a, 2, "test3"); +$state1 = $vectorGetResult($a); + +$a = $vectorEmplace($a, 1, "test22"); +$state2 = $vectorGetResult($a); + +$a = $vectorSwap($a, 0, 2); +$state3 = $vectorGetResult($a); + +SELECT $state1, $state2, $state3; diff --git a/yql/essentials/udfs/common/vector/test/ya.make b/yql/essentials/udfs/common/vector/test/ya.make new file mode 100644 index 00000000000..0c80f4ea677 --- /dev/null +++ b/yql/essentials/udfs/common/vector/test/ya.make @@ -0,0 +1,13 @@ +YQL_UDF_TEST_CONTRIB() + +DEPENDS(yql/essentials/udfs/common/vector) + +TIMEOUT(300) + +SIZE(MEDIUM) + +IF (SANITIZER_TYPE == "memory") + TAG(ya:not_autocheck) # YQL-15385 +ENDIF() + +END() diff --git a/yql/essentials/udfs/common/vector/vector_udf.cpp b/yql/essentials/udfs/common/vector/vector_udf.cpp new file mode 100644 index 00000000000..e8b01e5a05c --- /dev/null +++ b/yql/essentials/udfs/common/vector/vector_udf.cpp @@ -0,0 +1,192 @@ +#include <yql/essentials/public/udf/udf_type_ops.h> +#include <yql/essentials/public/udf/udf_helpers.h> + +#include <vector> + +using namespace NKikimr; +using namespace NUdf; + +namespace { + +class TVector { +private: + std::vector<TUnboxedValue, TUnboxedValue::TAllocator> Vector; + +public: + TVector() + : Vector() + {} + + TUnboxedValue GetResult(const IValueBuilder* builder) { + TUnboxedValue* values = nullptr; + auto list = builder->NewArray(Vector.size(), values); + std::copy(Vector.begin(), Vector.end(), values); + + return list; + } + + void Emplace(const ui64 index, const TUnboxedValuePod& value) { + if (index < Vector.size()) { + Vector[index] = value; + } else { + Vector.push_back(value); + } + } + + void Swap(const ui64 a, const ui64 b) { + if (a < Vector.size() && b < Vector.size()) { + std::swap(Vector[a], Vector[b]); + } + } + + void Reserve(ui64 expectedSize) { + Vector.reserve(expectedSize); + } +}; + +extern const char VectorResourceName[] = "Vector.VectorResource"; +class TVectorResource: + public TBoxedResource<TVector, VectorResourceName> +{ +public: + template <typename... Args> + inline TVectorResource(Args&&... args) + : TBoxedResource(std::forward<Args>(args)...) + {} +}; + +TVectorResource* GetVectorResource(const TUnboxedValuePod& arg) { + TVectorResource::Validate(arg); + return static_cast<TVectorResource*>(arg.AsBoxed().Get()); +} + +class TVectorCreate: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { + auto resource = new TVectorResource; + resource->Get()->Reserve(args[0].Get<ui64>()); + return TUnboxedValuePod(resource); + } +}; + +class TVectorEmplace: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { + auto resource = GetVectorResource(args[0]); + resource->Get()->Emplace(args[1].Get<ui64>(), args[2]); + return TUnboxedValuePod(resource); + } +}; + +class TVectorSwap: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { + auto resource = GetVectorResource(args[0]); + resource->Get()->Swap(args[1].Get<ui64>(), args[2].Get<ui64>()); + return TUnboxedValuePod(resource); + } +}; + +class TVectorGetResult: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { + return GetVectorResource(args[0])->Get()->GetResult(valueBuilder); + } +}; + +static const auto CreateName = TStringRef::Of("Create"); +static const auto EmplaceName = TStringRef::Of("Emplace"); +static const auto SwapName = TStringRef::Of("Swap"); +static const auto GetResultName = TStringRef::Of("GetResult"); + +class TVectorModule: public IUdfModule { +public: + TStringRef Name() const { + return TStringRef::Of("Vector"); + } + + void CleanupOnTerminate() const final { + } + + void GetAllFunctions(IFunctionsSink& sink) const final { + sink.Add(CreateName)->SetTypeAwareness(); + sink.Add(EmplaceName)->SetTypeAwareness(); + sink.Add(SwapName)->SetTypeAwareness(); + sink.Add(GetResultName)->SetTypeAwareness(); + } + + void BuildFunctionTypeInfo( + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final + { + Y_UNUSED(typeConfig); + + try { + const bool typesOnly = (flags & TFlags::TypesOnly); + builder.UserType(userType); + + auto typeHelper = builder.TypeInfoHelper(); + + auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType); + if (!userTypeInspector || userTypeInspector.GetElementsCount() != 3) { + builder.SetError("User type is not a 3-tuple"); + return; + } + + auto valueType = userTypeInspector.GetElementType(2); + TType* vectorType = builder.Resource(VectorResourceName); + + if (name == CreateName) { + builder.IsStrict(); + + builder.Args()->Add<ui64>().Done().Returns(vectorType); + + if (!typesOnly) { + builder.Implementation(new TVectorCreate); + } + } + + if (name == EmplaceName) { + builder.IsStrict(); + + builder.Args()->Add(vectorType).Add<ui64>().Add(valueType).Done().Returns(vectorType); + + if (!typesOnly) { + builder.Implementation(new TVectorEmplace); + } + } + + if (name == SwapName) { + builder.IsStrict(); + + builder.Args()->Add(vectorType).Add<ui64>().Add<ui64>().Done().Returns(vectorType); + + if (!typesOnly) { + builder.Implementation(new TVectorSwap); + } + } + + if (name == GetResultName) { + auto resultType = builder.List()->Item(valueType).Build(); + + builder.IsStrict(); + + builder.Args()->Add(vectorType).Done().Returns(resultType); + + if (!typesOnly) { + builder.Implementation(new TVectorGetResult); + } + } + + } catch (const std::exception& e) { + builder.SetError(CurrentExceptionMessage()); + } + } +}; + +} // namespace + +REGISTER_MODULES(TVectorModule) diff --git a/yql/essentials/udfs/common/vector/ya.make b/yql/essentials/udfs/common/vector/ya.make new file mode 100644 index 00000000000..a1403f62a61 --- /dev/null +++ b/yql/essentials/udfs/common/vector/ya.make @@ -0,0 +1,17 @@ +YQL_UDF_CONTRIB(vector_udf) + +YQL_ABI_VERSION( + 2 + 35 + 0 +) + +SRCS( + vector_udf.cpp +) + +END() + +RECURSE_FOR_TESTS( + test +) diff --git a/yql/essentials/udfs/common/ya.make b/yql/essentials/udfs/common/ya.make index 29266857edf..415f9f9b389 100644 --- a/yql/essentials/udfs/common/ya.make +++ b/yql/essentials/udfs/common/ya.make @@ -21,6 +21,7 @@ RECURSE( topfreq unicode_base url_base + vector yson2 ) |
