aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVitaly Isaev <vitalyisaev@ydb.tech>2025-05-29 21:05:07 +0300
committerGitHub <noreply@github.com>2025-05-29 21:05:07 +0300
commitc46013df5a5d7f12abc2107082d1ec1ef942989c (patch)
tree299eb45fba9a32b2cd1be16c055ce02dbd339d16
parent8217cde3a26ce8e344652c13531cd402a49588b7 (diff)
downloadydb-c46013df5a5d7f12abc2107082d1ec1ef942989c.tar.gz
YDB FQ: support REGEXP pushdown for Generic provider (#19024)
-rw-r--r--ydb/library/yql/providers/generic/provider/ut/pushdown/pushdown_ut.cpp74
-rw-r--r--ydb/library/yql/providers/generic/provider/ut/pushdown/ya.make9
-rw-r--r--ydb/library/yql/providers/generic/provider/yql_generic_physical_opt.cpp4
3 files changed, 72 insertions, 15 deletions
diff --git a/ydb/library/yql/providers/generic/provider/ut/pushdown/pushdown_ut.cpp b/ydb/library/yql/providers/generic/provider/ut/pushdown/pushdown_ut.cpp
index d38f0a3fee1..97599b66fee 100644
--- a/ydb/library/yql/providers/generic/provider/ut/pushdown/pushdown_ut.cpp
+++ b/ydb/library/yql/providers/generic/provider/ut/pushdown/pushdown_ut.cpp
@@ -1,29 +1,29 @@
+#include <ydb/library/yql/dq/expr_nodes/dq_expr_nodes.h>
+#include <ydb/library/yql/providers/common/db_id_async_resolver/db_async_resolver.h>
+#include <ydb/library/yql/providers/dq/common/yql_dq_settings.h>
+#include <ydb/library/yql/providers/dq/expr_nodes/dqs_expr_nodes.h>
#include <ydb/library/yql/providers/generic/expr_nodes/yql_generic_expr_nodes.h>
#include <ydb/library/yql/providers/generic/proto/source.pb.h>
-#include <ydb/library/yql/providers/generic/provider/yql_generic_state.h>
#include <ydb/library/yql/providers/generic/provider/yql_generic_provider.h>
+#include <ydb/library/yql/providers/generic/provider/yql_generic_state.h>
#include <yql/essentials/ast/yql_ast.h>
#include <yql/essentials/ast/yql_expr.h>
+#include <yql/essentials/core/dq_integration/yql_dq_integration.h>
+#include <yql/essentials/core/services/yql_out_transformers.h>
+#include <yql/essentials/core/services/yql_transform_pipeline.h>
#include <yql/essentials/core/yql_graph_transformer.h>
#include <yql/essentials/core/yql_type_annotation.h>
-#include <yql/essentials/core/services/yql_transform_pipeline.h>
-#include <yql/essentials/core/services/yql_out_transformers.h>
-#include <yql/essentials/core/dq_integration/yql_dq_integration.h>
#include <yql/essentials/minikql/invoke_builtins/mkql_builtins.h>
#include <yql/essentials/minikql/mkql_function_registry.h>
-#include <ydb/library/yql/providers/common/db_id_async_resolver/db_async_resolver.h>
#include <yql/essentials/providers/common/provider/yql_provider_names.h>
#include <yql/essentials/providers/common/transform/yql_optimize.h>
-#include <ydb/library/yql/providers/dq/common/yql_dq_settings.h>
-#include <ydb/library/yql/providers/dq/expr_nodes/dqs_expr_nodes.h>
-#include <ydb/library/yql/dq/expr_nodes/dq_expr_nodes.h>
+#include <yql/essentials/providers/common/udf_resolve/yql_simple_udf_resolver.h>
#include <yql/essentials/providers/result/provider/yql_result_provider.h>
#include <yql/essentials/sql/sql.h>
#include <yql/essentials/utils/log/log.h>
#include <library/cpp/testing/unittest/registar.h>
-
#include <library/cpp/random_provider/random_provider.h>
#include <google/protobuf/text_format.h>
@@ -246,7 +246,12 @@ struct TPushdownFixture: public NUnitTest::TBaseFixture {
TypesCtx = MakeIntrusive<TTypeAnnotationContext>();
TypesCtx->RandomProvider = CreateDeterministicRandomProvider(1);
- FunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone(); // TODO: remove Clone()
+ auto functionRegistry = CreateFunctionRegistry(&PrintBackTrace, NKikimr::NMiniKQL::CreateBuiltinRegistry(), false, {})->Clone();
+ NKikimr::NMiniKQL::FillStaticModules(*functionRegistry);
+ FunctionRegistry = std::move(functionRegistry);
+
+ TypesCtx->UdfResolver = NYql::NCommon::CreateSimpleUdfResolver(FunctionRegistry.Get());
+ TypesCtx->UserDataStorage = MakeIntrusive<TUserDataStorage>(nullptr, TUserDataTable(), nullptr, nullptr);
{
auto* setting = GatewaysCfg.MutableGeneric()->AddDefaultSettings();
@@ -711,4 +716,53 @@ Y_UNIT_TEST_SUITE_F(PushdownTest, TPushdownFixture) {
)proto"
);
}
+
+ Y_UNIT_TEST(RegexpPushdown) {
+ AssertFilter(
+ // Test REGEXP pushdown with a simple pattern matching digits
+ R"ast(
+ (Coalesce
+ (Apply (Udf '"Re2.Grep" '((String '"\\\\d+") (Nothing
+ (OptionalType
+ (StructType
+ '('"CaseSensitive" (DataType 'Bool))
+ '('"DotNl" (DataType 'Bool))
+ '('"Literal" (DataType 'Bool))
+ '('"LogErrors" (DataType 'Bool))
+ '('"LongestMatch" (DataType 'Bool))
+ '('"MaxMem" (DataType 'Uint64))
+ '('"NeverCapture" (DataType 'Bool))
+ '('"NeverNl" (DataType 'Bool))
+ '('"OneLine" (DataType 'Bool))
+ '('"PerlClasses" (DataType 'Bool))
+ '('"PosixSyntax" (DataType 'Bool))
+ '('"Utf8" (DataType 'Bool))
+ '('"WordBoundary" (DataType 'Bool))
+ )
+ )
+ )))
+ (Member $row '"col_string")
+ )
+ (Bool '"false")
+ )
+ )ast",
+ R"proto(
+ regexp {
+ value {
+ column: "col_string"
+ }
+ pattern {
+ typed_value {
+ type {
+ type_id: STRING
+ }
+ value {
+ bytes_value: "\\\\d+"
+ }
+ }
+ }
+ }
+ )proto"
+ );
+ }
}
diff --git a/ydb/library/yql/providers/generic/provider/ut/pushdown/ya.make b/ydb/library/yql/providers/generic/provider/ut/pushdown/ya.make
index 2e77d069325..a329859e385 100644
--- a/ydb/library/yql/providers/generic/provider/ut/pushdown/ya.make
+++ b/ydb/library/yql/providers/generic/provider/ut/pushdown/ya.make
@@ -7,18 +7,19 @@ SRCS(
PEERDIR(
contrib/libs/fmt
library/cpp/random_provider
+ ydb/library/yql/dq/expr_nodes
+ ydb/library/yql/providers/common/db_id_async_resolver
+ ydb/library/yql/providers/generic/expr_nodes
yql/essentials/ast
yql/essentials/core
yql/essentials/core/services
- ydb/library/yql/dq/expr_nodes
yql/essentials/minikql
- ydb/library/yql/providers/common/db_id_async_resolver
- ydb/library/yql/providers/generic/expr_nodes
+ yql/essentials/minikql/invoke_builtins/llvm16
yql/essentials/providers/result/provider
yql/essentials/public/udf/service/stub
yql/essentials/sql
- yql/essentials/minikql/invoke_builtins/llvm16
yql/essentials/sql/pg_dummy
+ yql/essentials/udfs/common/re2
)
SIZE(SMALL)
diff --git a/ydb/library/yql/providers/generic/provider/yql_generic_physical_opt.cpp b/ydb/library/yql/providers/generic/provider/yql_generic_physical_opt.cpp
index ff831e99fcf..76501306a4d 100644
--- a/ydb/library/yql/providers/generic/provider/yql_generic_physical_opt.cpp
+++ b/ydb/library/yql/providers/generic/provider/yql_generic_physical_opt.cpp
@@ -34,8 +34,10 @@ namespace NYql {
EFlag::DateTimeTypes |
EFlag::TimestampCtor |
EFlag::StringTypes |
- EFlag::LikeOperator
+ EFlag::LikeOperator |
+ EFlag::JustPassthroughOperators // For REGEXP pushdown
);
+ EnableFunction("Re2.Grep"); // For REGEXP pushdown
}
};