diff options
author | Vitaly Isaev <vitalyisaev@ydb.tech> | 2025-05-29 21:05:07 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-05-29 21:05:07 +0300 |
commit | c46013df5a5d7f12abc2107082d1ec1ef942989c (patch) | |
tree | 299eb45fba9a32b2cd1be16c055ce02dbd339d16 | |
parent | 8217cde3a26ce8e344652c13531cd402a49588b7 (diff) | |
download | ydb-c46013df5a5d7f12abc2107082d1ec1ef942989c.tar.gz |
YDB FQ: support REGEXP pushdown for Generic provider (#19024)
3 files changed, 72 insertions, 15 deletions
diff --git a/ydb/library/yql/providers/generic/provider/ut/pushdown/pushdown_ut.cpp b/ydb/library/yql/providers/generic/provider/ut/pushdown/pushdown_ut.cpp index d38f0a3fee1..97599b66fee 100644 --- a/ydb/library/yql/providers/generic/provider/ut/pushdown/pushdown_ut.cpp +++ b/ydb/library/yql/providers/generic/provider/ut/pushdown/pushdown_ut.cpp @@ -1,29 +1,29 @@ +#include <ydb/library/yql/dq/expr_nodes/dq_expr_nodes.h> +#include <ydb/library/yql/providers/common/db_id_async_resolver/db_async_resolver.h> +#include <ydb/library/yql/providers/dq/common/yql_dq_settings.h> +#include <ydb/library/yql/providers/dq/expr_nodes/dqs_expr_nodes.h> #include <ydb/library/yql/providers/generic/expr_nodes/yql_generic_expr_nodes.h> #include <ydb/library/yql/providers/generic/proto/source.pb.h> -#include <ydb/library/yql/providers/generic/provider/yql_generic_state.h> #include <ydb/library/yql/providers/generic/provider/yql_generic_provider.h> +#include <ydb/library/yql/providers/generic/provider/yql_generic_state.h> #include <yql/essentials/ast/yql_ast.h> #include <yql/essentials/ast/yql_expr.h> +#include <yql/essentials/core/dq_integration/yql_dq_integration.h> +#include <yql/essentials/core/services/yql_out_transformers.h> +#include <yql/essentials/core/services/yql_transform_pipeline.h> #include <yql/essentials/core/yql_graph_transformer.h> #include <yql/essentials/core/yql_type_annotation.h> -#include <yql/essentials/core/services/yql_transform_pipeline.h> -#include <yql/essentials/core/services/yql_out_transformers.h> -#include <yql/essentials/core/dq_integration/yql_dq_integration.h> #include <yql/essentials/minikql/invoke_builtins/mkql_builtins.h> #include <yql/essentials/minikql/mkql_function_registry.h> -#include <ydb/library/yql/providers/common/db_id_async_resolver/db_async_resolver.h> #include <yql/essentials/providers/common/provider/yql_provider_names.h> #include <yql/essentials/providers/common/transform/yql_optimize.h> -#include <ydb/library/yql/providers/dq/common/yql_dq_settings.h> -#include <ydb/library/yql/providers/dq/expr_nodes/dqs_expr_nodes.h> -#include <ydb/library/yql/dq/expr_nodes/dq_expr_nodes.h> +#include <yql/essentials/providers/common/udf_resolve/yql_simple_udf_resolver.h> #include <yql/essentials/providers/result/provider/yql_result_provider.h> #include <yql/essentials/sql/sql.h> #include <yql/essentials/utils/log/log.h> #include <library/cpp/testing/unittest/registar.h> - #include <library/cpp/random_provider/random_provider.h> #include <google/protobuf/text_format.h> @@ -246,7 +246,12 @@ struct TPushdownFixture: public NUnitTest::TBaseFixture { TypesCtx = MakeIntrusive<TTypeAnnotationContext>(); TypesCtx->RandomProvider = CreateDeterministicRandomProvider(1); - FunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone(); // TODO: remove Clone() + auto functionRegistry = CreateFunctionRegistry(&PrintBackTrace, NKikimr::NMiniKQL::CreateBuiltinRegistry(), false, {})->Clone(); + NKikimr::NMiniKQL::FillStaticModules(*functionRegistry); + FunctionRegistry = std::move(functionRegistry); + + TypesCtx->UdfResolver = NYql::NCommon::CreateSimpleUdfResolver(FunctionRegistry.Get()); + TypesCtx->UserDataStorage = MakeIntrusive<TUserDataStorage>(nullptr, TUserDataTable(), nullptr, nullptr); { auto* setting = GatewaysCfg.MutableGeneric()->AddDefaultSettings(); @@ -711,4 +716,53 @@ Y_UNIT_TEST_SUITE_F(PushdownTest, TPushdownFixture) { )proto" ); } + + Y_UNIT_TEST(RegexpPushdown) { + AssertFilter( + // Test REGEXP pushdown with a simple pattern matching digits + R"ast( + (Coalesce + (Apply (Udf '"Re2.Grep" '((String '"\\\\d+") (Nothing + (OptionalType + (StructType + '('"CaseSensitive" (DataType 'Bool)) + '('"DotNl" (DataType 'Bool)) + '('"Literal" (DataType 'Bool)) + '('"LogErrors" (DataType 'Bool)) + '('"LongestMatch" (DataType 'Bool)) + '('"MaxMem" (DataType 'Uint64)) + '('"NeverCapture" (DataType 'Bool)) + '('"NeverNl" (DataType 'Bool)) + '('"OneLine" (DataType 'Bool)) + '('"PerlClasses" (DataType 'Bool)) + '('"PosixSyntax" (DataType 'Bool)) + '('"Utf8" (DataType 'Bool)) + '('"WordBoundary" (DataType 'Bool)) + ) + ) + ))) + (Member $row '"col_string") + ) + (Bool '"false") + ) + )ast", + R"proto( + regexp { + value { + column: "col_string" + } + pattern { + typed_value { + type { + type_id: STRING + } + value { + bytes_value: "\\\\d+" + } + } + } + } + )proto" + ); + } } diff --git a/ydb/library/yql/providers/generic/provider/ut/pushdown/ya.make b/ydb/library/yql/providers/generic/provider/ut/pushdown/ya.make index 2e77d069325..a329859e385 100644 --- a/ydb/library/yql/providers/generic/provider/ut/pushdown/ya.make +++ b/ydb/library/yql/providers/generic/provider/ut/pushdown/ya.make @@ -7,18 +7,19 @@ SRCS( PEERDIR( contrib/libs/fmt library/cpp/random_provider + ydb/library/yql/dq/expr_nodes + ydb/library/yql/providers/common/db_id_async_resolver + ydb/library/yql/providers/generic/expr_nodes yql/essentials/ast yql/essentials/core yql/essentials/core/services - ydb/library/yql/dq/expr_nodes yql/essentials/minikql - ydb/library/yql/providers/common/db_id_async_resolver - ydb/library/yql/providers/generic/expr_nodes + yql/essentials/minikql/invoke_builtins/llvm16 yql/essentials/providers/result/provider yql/essentials/public/udf/service/stub yql/essentials/sql - yql/essentials/minikql/invoke_builtins/llvm16 yql/essentials/sql/pg_dummy + yql/essentials/udfs/common/re2 ) SIZE(SMALL) diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_physical_opt.cpp b/ydb/library/yql/providers/generic/provider/yql_generic_physical_opt.cpp index ff831e99fcf..76501306a4d 100644 --- a/ydb/library/yql/providers/generic/provider/yql_generic_physical_opt.cpp +++ b/ydb/library/yql/providers/generic/provider/yql_generic_physical_opt.cpp @@ -34,8 +34,10 @@ namespace NYql { EFlag::DateTimeTypes | EFlag::TimestampCtor | EFlag::StringTypes | - EFlag::LikeOperator + EFlag::LikeOperator | + EFlag::JustPassthroughOperators // For REGEXP pushdown ); + EnableFunction("Re2.Grep"); // For REGEXP pushdown } }; |