diff options
author | a-romanov <Anton.Romanov@ydb.tech> | 2023-11-08 01:16:13 +0300 |
---|---|---|
committer | a-romanov <Anton.Romanov@ydb.tech> | 2023-11-08 01:46:49 +0300 |
commit | bb0dc6a64112b14429bb9bcc898ed7ac4a5dfefb (patch) | |
tree | 0cd968356124afb794ca3d4a198d67dc60b38b44 | |
parent | c17218a1a9983a320fa7b40e8e0ef11da6579c56 (diff) | |
download | ydb-bb0dc6a64112b14429bb9bcc898ed7ac4a5dfefb.tar.gz |
KIKIMR-19512 Add more of arrow yql kernels.
7 files changed, 103 insertions, 45 deletions
diff --git a/ydb/library/yql/core/arrow_kernels/request/CMakeLists.darwin-x86_64.txt b/ydb/library/yql/core/arrow_kernels/request/CMakeLists.darwin-x86_64.txt index ec3f3c6f6f0..fdfb8ab6c73 100644 --- a/ydb/library/yql/core/arrow_kernels/request/CMakeLists.darwin-x86_64.txt +++ b/ydb/library/yql/core/arrow_kernels/request/CMakeLists.darwin-x86_64.txt @@ -6,6 +6,12 @@ # original buildsystem will not be accepted. +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) add_library(core-arrow_kernels-request) target_compile_options(core-arrow_kernels-request PRIVATE @@ -19,7 +25,13 @@ target_link_libraries(core-arrow_kernels-request PUBLIC providers-common-mkql library-yql-core library-yql-sql + tools-enum_parser-enum_serialization_runtime ) target_sources(core-arrow_kernels-request PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/arrow_kernels/request/request.cpp ) +generate_enum_serilization(core-arrow_kernels-request + ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/arrow_kernels/request/request.h + INCLUDE_HEADERS + ydb/library/yql/core/arrow_kernels/request/request.h +) diff --git a/ydb/library/yql/core/arrow_kernels/request/CMakeLists.linux-aarch64.txt b/ydb/library/yql/core/arrow_kernels/request/CMakeLists.linux-aarch64.txt index 9627c104630..68d5a8c39de 100644 --- a/ydb/library/yql/core/arrow_kernels/request/CMakeLists.linux-aarch64.txt +++ b/ydb/library/yql/core/arrow_kernels/request/CMakeLists.linux-aarch64.txt @@ -6,6 +6,12 @@ # original buildsystem will not be accepted. +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) add_library(core-arrow_kernels-request) target_compile_options(core-arrow_kernels-request PRIVATE @@ -20,7 +26,13 @@ target_link_libraries(core-arrow_kernels-request PUBLIC providers-common-mkql library-yql-core library-yql-sql + tools-enum_parser-enum_serialization_runtime ) target_sources(core-arrow_kernels-request PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/arrow_kernels/request/request.cpp ) +generate_enum_serilization(core-arrow_kernels-request + ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/arrow_kernels/request/request.h + INCLUDE_HEADERS + ydb/library/yql/core/arrow_kernels/request/request.h +) diff --git a/ydb/library/yql/core/arrow_kernels/request/CMakeLists.linux-x86_64.txt b/ydb/library/yql/core/arrow_kernels/request/CMakeLists.linux-x86_64.txt index 9627c104630..68d5a8c39de 100644 --- a/ydb/library/yql/core/arrow_kernels/request/CMakeLists.linux-x86_64.txt +++ b/ydb/library/yql/core/arrow_kernels/request/CMakeLists.linux-x86_64.txt @@ -6,6 +6,12 @@ # original buildsystem will not be accepted. +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) add_library(core-arrow_kernels-request) target_compile_options(core-arrow_kernels-request PRIVATE @@ -20,7 +26,13 @@ target_link_libraries(core-arrow_kernels-request PUBLIC providers-common-mkql library-yql-core library-yql-sql + tools-enum_parser-enum_serialization_runtime ) target_sources(core-arrow_kernels-request PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/arrow_kernels/request/request.cpp ) +generate_enum_serilization(core-arrow_kernels-request + ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/arrow_kernels/request/request.h + INCLUDE_HEADERS + ydb/library/yql/core/arrow_kernels/request/request.h +) diff --git a/ydb/library/yql/core/arrow_kernels/request/CMakeLists.windows-x86_64.txt b/ydb/library/yql/core/arrow_kernels/request/CMakeLists.windows-x86_64.txt index ec3f3c6f6f0..fdfb8ab6c73 100644 --- a/ydb/library/yql/core/arrow_kernels/request/CMakeLists.windows-x86_64.txt +++ b/ydb/library/yql/core/arrow_kernels/request/CMakeLists.windows-x86_64.txt @@ -6,6 +6,12 @@ # original buildsystem will not be accepted. +get_built_tool_path( + TOOL_enum_parser_bin + TOOL_enum_parser_dependency + tools/enum_parser/enum_parser + enum_parser +) add_library(core-arrow_kernels-request) target_compile_options(core-arrow_kernels-request PRIVATE @@ -19,7 +25,13 @@ target_link_libraries(core-arrow_kernels-request PUBLIC providers-common-mkql library-yql-core library-yql-sql + tools-enum_parser-enum_serialization_runtime ) target_sources(core-arrow_kernels-request PRIVATE ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/arrow_kernels/request/request.cpp ) +generate_enum_serilization(core-arrow_kernels-request + ${CMAKE_SOURCE_DIR}/ydb/library/yql/core/arrow_kernels/request/request.h + INCLUDE_HEADERS + ydb/library/yql/core/arrow_kernels/request/request.h +) diff --git a/ydb/library/yql/core/arrow_kernels/request/request.cpp b/ydb/library/yql/core/arrow_kernels/request/request.cpp index 71700d47e09..374e04b0c89 100644 --- a/ydb/library/yql/core/arrow_kernels/request/request.cpp +++ b/ydb/library/yql/core/arrow_kernels/request/request.cpp @@ -20,13 +20,16 @@ TKernelRequestBuilder::~TKernelRequestBuilder() { } ui32 TKernelRequestBuilder::AddUnaryOp(EUnaryOp op, const TTypeAnnotationNode* arg1Type, const TTypeAnnotationNode* retType) { - TGuard<NKikimr::NMiniKQL::TScopedAlloc> allocGuard(Alloc_); - auto returnType = MakeType(retType); + const TGuard<NKikimr::NMiniKQL::TScopedAlloc> allocGuard(Alloc_); + const auto returnType = MakeType(retType); Y_UNUSED(returnType); - auto arg1 = MakeArg(arg1Type); + const auto arg = MakeArg(arg1Type); switch (op) { case EUnaryOp::Not: - Items_.emplace_back(Pb_.BlockNot(arg1)); + Items_.emplace_back(Pb_.BlockNot(arg)); + break; + case EUnaryOp::Size: + Items_.emplace_back(Pb_.BlockFunc(ToString(op), returnType, { arg })); break; } @@ -34,10 +37,10 @@ ui32 TKernelRequestBuilder::AddUnaryOp(EUnaryOp op, const TTypeAnnotationNode* a } ui32 TKernelRequestBuilder::AddBinaryOp(EBinaryOp op, const TTypeAnnotationNode* arg1Type, const TTypeAnnotationNode* arg2Type, const TTypeAnnotationNode* retType) { - TGuard<NKikimr::NMiniKQL::TScopedAlloc> allocGuard(Alloc_); - auto returnType = MakeType(retType); - auto arg1 = MakeArg(arg1Type); - auto arg2 = MakeArg(arg2Type); + const TGuard<NKikimr::NMiniKQL::TScopedAlloc> allocGuard(Alloc_); + const auto returnType = MakeType(retType); + const auto arg1 = MakeArg(arg1Type); + const auto arg2 = MakeArg(arg2Type); switch (op) { case EBinaryOp::And: Items_.emplace_back(Pb_.BlockAnd(arg1, arg2)); @@ -49,34 +52,29 @@ ui32 TKernelRequestBuilder::AddBinaryOp(EBinaryOp op, const TTypeAnnotationNode* Items_.emplace_back(Pb_.BlockXor(arg1, arg2)); break; case EBinaryOp::Add: - Items_.emplace_back(Pb_.BlockFunc("Add", returnType, { arg1, arg2 })); - break; case EBinaryOp::Sub: - Items_.emplace_back(Pb_.BlockFunc("Sub", returnType, { arg1, arg2 })); - break; case EBinaryOp::Mul: - Items_.emplace_back(Pb_.BlockFunc("Mul", returnType, { arg1, arg2 })); - break; case EBinaryOp::Div: - Items_.emplace_back(Pb_.BlockFunc("Div", returnType, { arg1, arg2 })); - break; + case EBinaryOp::Mod: case EBinaryOp::StartsWith: - Items_.emplace_back(Pb_.BlockFunc("StartsWith", returnType, { arg1, arg2 })); - break; case EBinaryOp::EndsWith: - Items_.emplace_back(Pb_.BlockFunc("EndsWith", returnType, { arg1, arg2 })); - break; case EBinaryOp::StringContains: - Items_.emplace_back(Pb_.BlockFunc("StringContains", returnType, { arg1, arg2 })); - break; + case EBinaryOp::Equals: + case EBinaryOp::NotEquals: + case EBinaryOp::Less: + case EBinaryOp::LessOrEqual: + case EBinaryOp::Greater: + case EBinaryOp::GreaterOrEqual: + Items_.emplace_back(Pb_.BlockFunc(ToString(op), returnType, { arg1, arg2 })); + break; } return Items_.size() - 1; } -ui32 TKernelRequestBuilder::Udf(const TString& name, bool isPolymorphic, const std::vector<const TTypeAnnotationNode*>& argTypes, +ui32 TKernelRequestBuilder::Udf(const TString& name, bool isPolymorphic, const TTypeAnnotationNode::TListType& argTypes, const TTypeAnnotationNode* retType) { - TGuard<NKikimr::NMiniKQL::TScopedAlloc> allocGuard(Alloc_); + const TGuard<NKikimr::NMiniKQL::TScopedAlloc> allocGuard(Alloc_); std::vector<NKikimr::NMiniKQL::TType*> inputTypes; for (const auto& type : argTypes) { inputTypes.emplace_back(MakeType(type)); @@ -88,7 +86,7 @@ ui32 TKernelRequestBuilder::Udf(const TString& name, bool isPolymorphic, const s Pb_.NewEmptyTupleType()}); auto udf = Pb_.Udf(isPolymorphic ? name : (name + "_BlocksImpl"), Pb_.NewVoid(), userType); - std::vector<NKikimr::NMiniKQL::TRuntimeNode> args; + NKikimr::NMiniKQL::TRuntimeNode::TList args; for (const auto& type : argTypes) { args.emplace_back(MakeArg(type)); } @@ -101,13 +99,13 @@ ui32 TKernelRequestBuilder::Udf(const TString& name, bool isPolymorphic, const s } ui32 TKernelRequestBuilder::JsonExists(const TTypeAnnotationNode* arg1Type, const TTypeAnnotationNode* arg2Type, const TTypeAnnotationNode* retType) { - TGuard<NKikimr::NMiniKQL::TScopedAlloc> allocGuard(Alloc_); - + const TGuard<NKikimr::NMiniKQL::TScopedAlloc> allocGuard(Alloc_); + bool isScalar = false; bool isBinaryJson = (RemoveOptionalType(NYql::GetBlockItemType(*arg1Type, isScalar))->Cast<TDataExprType>()->GetSlot() == EDataSlot::JsonDocument); auto udfName = TStringBuilder() << "Json2." << (isBinaryJson ? "JsonDocument" : "" ) << "SqlExists"; - + auto exists = Pb_.Udf(udfName); auto parse = Pb_.Udf("Json2.Parse"); auto compilePath = Pb_.Udf("Json2.CompilePath"); @@ -136,8 +134,8 @@ ui32 TKernelRequestBuilder::JsonExists(const TTypeAnnotationNode* arg1Type, cons } ui32 TKernelRequestBuilder::JsonValue(const TTypeAnnotationNode* arg1Type, const TTypeAnnotationNode* arg2Type, const TTypeAnnotationNode* retType) { - TGuard<NKikimr::NMiniKQL::TScopedAlloc> allocGuard(Alloc_); - + const TGuard<NKikimr::NMiniKQL::TScopedAlloc> allocGuard(Alloc_); + bool isScalar = false; bool isBinaryJson = (RemoveOptionalType(NYql::GetBlockItemType(*arg1Type, isScalar))->Cast<TDataExprType>()->GetSlot() == EDataSlot::JsonDocument); auto resultSlot = RemoveOptionalType(NYql::GetBlockItemType(*retType, isScalar))->Cast<TDataExprType>()->GetSlot(); @@ -154,7 +152,7 @@ ui32 TKernelRequestBuilder::JsonValue(const TTypeAnnotationNode* arg1Type, const } else { Y_ENSURE(false, "Invalid return type"); } - + auto jsonValue = Pb_.Udf(udfName); auto parse = Pb_.Udf("Json2.Parse"); @@ -192,10 +190,10 @@ ui32 TKernelRequestBuilder::JsonValue(const TTypeAnnotationNode* arg1Type, const } TString TKernelRequestBuilder::Serialize() { - TGuard<NKikimr::NMiniKQL::TScopedAlloc> allocGuard(Alloc_); - auto kernelTuple = Items_.empty() ? Pb_.AsScalar(Pb_.NewEmptyTuple()) : Pb_.BlockAsTuple(Items_); - auto argsTuple = ArgsItems_.empty() ? Pb_.AsScalar(Pb_.NewEmptyTuple()) : Pb_.BlockAsTuple(ArgsItems_); - auto tuple = Pb_.BlockAsTuple( { argsTuple, kernelTuple }); + const TGuard<NKikimr::NMiniKQL::TScopedAlloc> allocGuard(Alloc_); + const auto kernelTuple = Items_.empty() ? Pb_.AsScalar(Pb_.NewEmptyTuple()) : Pb_.BlockAsTuple(Items_); + const auto argsTuple = ArgsItems_.empty() ? Pb_.AsScalar(Pb_.NewEmptyTuple()) : Pb_.BlockAsTuple(ArgsItems_); + const auto tuple = Pb_.BlockAsTuple( { argsTuple, kernelTuple }); return NKikimr::NMiniKQL::SerializeRuntimeNode(tuple, Env_); } @@ -218,7 +216,7 @@ NKikimr::NMiniKQL::TBlockType* TKernelRequestBuilder::MakeType(const TTypeAnnota } TStringStream err; - auto ret = NCommon::BuildType(*type, Pb_, err); + const auto ret = NCommon::BuildType(*type, Pb_, err); if (!ret) { ythrow yexception() << err.Str(); } diff --git a/ydb/library/yql/core/arrow_kernels/request/request.h b/ydb/library/yql/core/arrow_kernels/request/request.h index 94a37bb1141..296172dfaf6 100644 --- a/ydb/library/yql/core/arrow_kernels/request/request.h +++ b/ydb/library/yql/core/arrow_kernels/request/request.h @@ -8,21 +8,32 @@ namespace NYql { class TKernelRequestBuilder { public: - enum EUnaryOp { - Not + enum class EUnaryOp { + Not, + Size, }; - enum EBinaryOp { + enum class EBinaryOp { And, Or, Xor, + Add, Sub, Mul, Div, + Mod, + StartsWith, EndsWith, - StringContains + StringContains, + + Equals, + NotEquals, + Less, + LessOrEqual, + Greater, + GreaterOrEqual, }; TKernelRequestBuilder(const NKikimr::NMiniKQL::IFunctionRegistry& functionRegistry); @@ -30,7 +41,7 @@ public: ui32 AddUnaryOp(EUnaryOp op, const TTypeAnnotationNode* arg1Type, const TTypeAnnotationNode* retType); ui32 AddBinaryOp(EBinaryOp op, const TTypeAnnotationNode* arg1Type, const TTypeAnnotationNode* arg2Type, const TTypeAnnotationNode* retType); - ui32 Udf(const TString& name, bool isPolymorphic, const std::vector<const TTypeAnnotationNode*>& argTypes, const TTypeAnnotationNode* retType); + ui32 Udf(const TString& name, bool isPolymorphic, const TTypeAnnotationNode::TListType& argTypes, const TTypeAnnotationNode* retType); // (json/json?,utf8)->bool/bool? ui32 JsonExists(const TTypeAnnotationNode* arg1Type, const TTypeAnnotationNode* arg2Type, const TTypeAnnotationNode* retType); ui32 JsonValue(const TTypeAnnotationNode* arg1Type, const TTypeAnnotationNode* arg2Type, const TTypeAnnotationNode* retType); @@ -41,13 +52,12 @@ private: NKikimr::NMiniKQL::TBlockType* MakeType(const TTypeAnnotationNode* type); private: NKikimr::NMiniKQL::TScopedAlloc Alloc_; - NKikimr::NMiniKQL::TTypeEnvironment Env_; + const NKikimr::NMiniKQL::TTypeEnvironment Env_; NKikimr::NMiniKQL::TProgramBuilder Pb_; - TVector<NKikimr::NMiniKQL::TRuntimeNode> Items_; - TVector<NKikimr::NMiniKQL::TRuntimeNode> ArgsItems_; + std::vector<NKikimr::NMiniKQL::TRuntimeNode> Items_; + std::vector<NKikimr::NMiniKQL::TRuntimeNode> ArgsItems_; std::unordered_map<const TTypeAnnotationNode*, NKikimr::NMiniKQL::TBlockType*> CachedTypes_; std::unordered_map<const TTypeAnnotationNode*, NKikimr::NMiniKQL::TRuntimeNode> CachedArgs_; }; - } diff --git a/ydb/library/yql/core/arrow_kernels/request/ya.make b/ydb/library/yql/core/arrow_kernels/request/ya.make index 45b85ecf069..f835776adeb 100644 --- a/ydb/library/yql/core/arrow_kernels/request/ya.make +++ b/ydb/library/yql/core/arrow_kernels/request/ya.make @@ -14,4 +14,6 @@ PEERDIR( YQL_LAST_ABI_VERSION() +GENERATE_ENUM_SERIALIZATION(request.h) + END() |