diff options
author | aidarsamer <aidarsamer@ydb.tech> | 2023-03-14 14:38:22 +0300 |
---|---|---|
committer | aidarsamer <aidarsamer@ydb.tech> | 2023-03-14 14:38:22 +0300 |
commit | 53f9702804fc067d0338dd0d0e1bc8d05e40a426 (patch) | |
tree | 2bbba02e3bf970e32c3595ec39fd948bf21e8d03 | |
parent | 9e24e2f07b2b2b983f765a755904a1c1b321f217 (diff) | |
download | ydb-53f9702804fc067d0338dd0d0e1bc8d05e40a426.tar.gz |
Add partial filter pushdown functionality for OLAP
Add check for strict filters and make changes to filter pushdown
Implement partial pushdown
Add partial filter pushdown functionality
20 files changed, 1393 insertions, 140 deletions
diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp index d75ce0979db..1b102843829 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp @@ -3,6 +3,7 @@ #include <ydb/core/kqp/common/kqp_yql.h> #include <ydb/library/yql/core/extract_predicate/extract_predicate.h> +#include <ydb/library/yql/core/yql_opt_utils.h> namespace NKikimr::NKqp::NOpt { @@ -17,39 +18,6 @@ bool IsFalseLiteral(TExprBase node) { return node.Maybe<TCoBool>() && !FromString<bool>(node.Cast<TCoBool>().Literal().Value()); } -bool ValidateIfArgument(const TCoOptionalIf& optionalIf, const TExprNode* rawLambdaArg) { - // Check it is SELECT * or SELECT `field1`, `field2`... - if (optionalIf.Value().Raw() == rawLambdaArg) { - return true; - } - - // Ok, maybe it is SELECT `field1`, `field2` ? - auto maybeAsStruct = optionalIf.Value().Maybe<TCoAsStruct>(); - if (!maybeAsStruct) { - return false; - } - - for (auto arg : maybeAsStruct.Cast()) { - // Check that second tuple element is Member(lambda arg) - auto tuple = arg.Maybe<TExprList>().Cast(); - if (tuple.Size() != 2) { - return false; - } - - auto maybeMember = tuple.Item(1).Maybe<TCoMember>(); - if (!maybeMember) { - return false; - } - - auto member = maybeMember.Cast(); - if (member.Struct().Raw() != rawLambdaArg) { - return false; - } - } - - return true; -} - TVector<TExprBase> ConvertComparisonNode(const TExprBase& nodeIn) { TVector<TExprBase> out; @@ -417,6 +385,40 @@ TMaybeNode<TExprBase> PredicatePushdown(const TExprBase& predicate, TExprContext .Done(); } +void SplitForPartialPushdown(const TPredicateNode& predicateTree, TPredicateNode& predicatesToPush, TPredicateNode& remainingPredicates, + TExprContext& ctx, TPositionHandle pos) +{ + if (predicateTree.CanBePushed) { + predicatesToPush = predicateTree; + remainingPredicates.ExprNode = Build<TCoBool>(ctx, pos).Literal().Build("true").Done(); + return; + } + + if (predicateTree.Op != EBoolOp::And) { + // We can partially pushdown predicates from AND operator only. + // For OR operator we would need to have several read operators which is not acceptable. + // TODO: Add support for NOT(op1 OR op2), because it expands to (!op1 AND !op2). + remainingPredicates = predicateTree; + return; + } + + bool isFoundNotStrictOp = false; + std::vector<TPredicateNode> pushable; + std::vector<TPredicateNode> remaining; + for (auto& predicate : predicateTree.Children) { + if (predicate.CanBePushed && !isFoundNotStrictOp) { + pushable.emplace_back(predicate); + } else { + if (!IsStrict(predicate.ExprNode.Cast().Ptr())) { + isFoundNotStrictOp = true; + } + remaining.emplace_back(predicate); + } + } + predicatesToPush.SetPredicates(pushable, ctx, pos); + remainingPredicates.SetPredicates(remaining, ctx, pos); +} + } // anonymous namespace end TExprBase KqpPushOlapFilter(TExprBase node, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx, @@ -450,19 +452,14 @@ TExprBase KqpPushOlapFilter(TExprBase node, TExprContext& ctx, const TKqpOptimiz } auto optionalIf = maybeOptionalIf.Cast(); - if (!ValidateIfArgument(optionalIf, lambdaArg)) { - return node; - } - TPredicateNode predicateTree(optionalIf.Predicate()); CollectPredicates(optionalIf.Predicate(), predicateTree, lambdaArg, read.Process().Body()); + YQL_ENSURE(predicateTree.IsValid(), "Collected OLAP predicates are invalid"); TPredicateNode predicatesToPush; TPredicateNode remainingPredicates; - if (predicateTree.CanBePushed) { - predicatesToPush = predicateTree; - remainingPredicates.ExprNode = Build<TCoBool>(ctx, node.Pos()).Literal().Build("true").Done(); - } else { + SplitForPartialPushdown(predicateTree, predicatesToPush, remainingPredicates, ctx, node.Pos()); + if (!predicatesToPush.IsValid()) { return node; } @@ -516,7 +513,10 @@ TExprBase KqpPushOlapFilter(TExprBase node, TExprContext& ctx, const TKqpOptimiz .Lambda<TCoLambda>() .Args({"new_arg"}) .Body<TCoOptionalIf>() - .Predicate(remainingPredicates.ExprNode.Cast()) + .Predicate<TExprApplier>() + .Apply(remainingPredicates.ExprNode.Cast()) + .With(lambda.Args().Arg(0), "new_arg") + .Build() .Value<TExprApplier>() .Apply(optionalIf.Value()) .With(lambda.Args().Arg(0), "new_arg") diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter_collection.cpp b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter_collection.cpp index f253538fe45..cf2c08b5324 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter_collection.cpp +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter_collection.cpp @@ -337,11 +337,6 @@ bool CoalesceCanBePushed(const TCoCoalesce& coalesce, const TExprNode* lambdaArg return false; } - if (coalesce.Value().Cast<TCoBool>().Literal().Value() != "false") { - // Maybe we don't need this check - return false; - } - if (auto maybeCompare = coalesce.Predicate().Maybe<TCoCompare>()) { return CompareCanBePushed(maybeCompare.Cast(), lambdaArg, lambdaBody); } else if (auto maybeFlatmap = coalesce.Predicate().Maybe<TCoFlatMap>()) { @@ -378,6 +373,41 @@ void CollectPredicatesForBinaryBoolOperators(const TExprBase& opNode, TPredicate } // anonymous namespace end +bool TPredicateNode::IsValid() const { + bool res = true; + if (Op != EBoolOp::Undefined) { + res &= !Children.empty(); + for (auto& child : Children) { + res &= child.IsValid(); + } + } + + return res && ExprNode.IsValid(); +} + +void TPredicateNode::SetPredicates(const std::vector<TPredicateNode>& predicates, TExprContext& ctx, TPositionHandle pos) { + auto predicatesSize = predicates.size(); + if (predicatesSize == 0) { + return; + } else if (predicatesSize == 1) { + *this = predicates[0]; + } else { + Op = EBoolOp::And; + Children = predicates; + CanBePushed = true; + + TVector<TExprBase> exprNodes; + exprNodes.reserve(predicatesSize); + for (auto& pred : predicates) { + exprNodes.emplace_back(pred.ExprNode.Cast()); + CanBePushed &= pred.CanBePushed; + } + ExprNode = Build<TCoAnd>(ctx, pos) + .Add(exprNodes) + .Done(); + } +} + void CollectPredicates(const TExprBase& predicate, TPredicateNode& predicateTree, const TExprNode* lambdaArg, const TExprBase& lambdaBody) { if (predicate.Maybe<TCoCoalesce>()) { auto coalesce = predicate.Cast<TCoCoalesce>(); diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter_collection.h b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter_collection.h index 005cbecc8b4..cfec72ac53d 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter_collection.h +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter_collection.h @@ -40,17 +40,8 @@ struct TPredicateNode { ~TPredicateNode() {} - bool IsValid() const { - bool res = true; - if (Op != EBoolOp::Undefined) { - res &= !Children.empty(); - for (auto& child : Children) { - res &= child.IsValid(); - } - } - - return res && ExprNode.IsValid(); - } + bool IsValid() const; + void SetPredicates(const std::vector<TPredicateNode>& predicates, NYql::TExprContext& ctx, NYql::TPositionHandle pos); NYql::NNodes::TMaybeNode<NYql::NNodes::TExprBase> ExprNode; std::vector<TPredicateNode> Children; diff --git a/ydb/core/kqp/ut/common/CMakeLists.darwin-x86_64.txt b/ydb/core/kqp/ut/common/CMakeLists.darwin-x86_64.txt index fd14efe2ef1..ac4ece79a0a 100644 --- a/ydb/core/kqp/ut/common/CMakeLists.darwin-x86_64.txt +++ b/ydb/core/kqp/ut/common/CMakeLists.darwin-x86_64.txt @@ -27,4 +27,5 @@ target_link_libraries(kqp-ut-common PUBLIC ) target_sources(kqp-ut-common PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/kqp_ut_common.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/re2_udf.cpp ) diff --git a/ydb/core/kqp/ut/common/CMakeLists.linux-aarch64.txt b/ydb/core/kqp/ut/common/CMakeLists.linux-aarch64.txt index 6f4af35c966..84aa8d90e3b 100644 --- a/ydb/core/kqp/ut/common/CMakeLists.linux-aarch64.txt +++ b/ydb/core/kqp/ut/common/CMakeLists.linux-aarch64.txt @@ -28,4 +28,5 @@ target_link_libraries(kqp-ut-common PUBLIC ) target_sources(kqp-ut-common PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/kqp_ut_common.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/re2_udf.cpp ) diff --git a/ydb/core/kqp/ut/common/CMakeLists.linux-x86_64.txt b/ydb/core/kqp/ut/common/CMakeLists.linux-x86_64.txt index 6f4af35c966..84aa8d90e3b 100644 --- a/ydb/core/kqp/ut/common/CMakeLists.linux-x86_64.txt +++ b/ydb/core/kqp/ut/common/CMakeLists.linux-x86_64.txt @@ -28,4 +28,5 @@ target_link_libraries(kqp-ut-common PUBLIC ) target_sources(kqp-ut-common PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/kqp_ut_common.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/re2_udf.cpp ) diff --git a/ydb/core/kqp/ut/common/CMakeLists.windows-x86_64.txt b/ydb/core/kqp/ut/common/CMakeLists.windows-x86_64.txt index fd14efe2ef1..ac4ece79a0a 100644 --- a/ydb/core/kqp/ut/common/CMakeLists.windows-x86_64.txt +++ b/ydb/core/kqp/ut/common/CMakeLists.windows-x86_64.txt @@ -27,4 +27,5 @@ target_link_libraries(kqp-ut-common PUBLIC ) target_sources(kqp-ut-common PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/kqp_ut_common.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/re2_udf.cpp ) diff --git a/ydb/core/kqp/ut/common/kqp_ut_common.cpp b/ydb/core/kqp/ut/common/kqp_ut_common.cpp index cc2ca688bd6..8bbf6325659 100644 --- a/ydb/core/kqp/ut/common/kqp_ut_common.cpp +++ b/ydb/core/kqp/ut/common/kqp_ut_common.cpp @@ -50,11 +50,13 @@ SIMPLE_UDF(TRandString, char*(ui32)) { } SIMPLE_MODULE(TTestUdfsModule, TTestFilter, TTestFilterTerminate, TRandString); +NYql::NUdf::TUniquePtr<NYql::NUdf::IUdfModule> CreateRe2Module(); NMiniKQL::IFunctionRegistry* UdfFrFactory(const NScheme::TTypeRegistry& typeRegistry) { Y_UNUSED(typeRegistry); auto funcRegistry = NMiniKQL::CreateFunctionRegistry(NMiniKQL::CreateBuiltinRegistry())->Clone(); funcRegistry->AddModule("", "TestUdfs", new TTestUdfsModule()); + funcRegistry->AddModule("", "Re2", CreateRe2Module()); NKikimr::NMiniKQL::FillStaticModules(*funcRegistry); return funcRegistry.Release(); } diff --git a/ydb/core/kqp/ut/common/re2_udf.cpp b/ydb/core/kqp/ut/common/re2_udf.cpp new file mode 100644 index 00000000000..f5582be121a --- /dev/null +++ b/ydb/core/kqp/ut/common/re2_udf.cpp @@ -0,0 +1,11 @@ +// HACK: the TRe2Module class is in an anonymous namespace +// so including the source cpp is the only way to access it +#include <ydb/library/yql/udfs/common/re2/re2_udf.cpp> + +namespace NKikimr::NKqp { + +NYql::NUdf::TUniquePtr<NYql::NUdf::IUdfModule> CreateRe2Module() { + return new TRe2Module<true>(); +} + +} // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp index c38f8a05316..34871b51924 100644 --- a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp +++ b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp @@ -1150,7 +1150,9 @@ Y_UNIT_TEST_SUITE(KqpOlap) { R"((`level`, `uid`, `resource_id`) != (Int32("0"), "uid_3000001", "10011"))", R"(`level` = 0 OR `level` = 2 OR `level` = 1)", R"(`level` = 0 OR (`level` = 2 AND `uid` = "uid_3000002"))", + R"(`level` = 0 OR NOT(`level` = 2 AND `uid` = "uid_3000002"))", R"(`level` = 0 AND (`uid` = "uid_3000000" OR `uid` = "uid_3000002"))", + R"(`level` = 0 AND NOT(`uid` = "uid_3000000" OR `uid` = "uid_3000002"))", R"(`level` = 0 OR `uid` = "uid_3000003")", R"(`level` = 0 AND `uid` = "uid_3000003")", R"(`level` = 0 AND `uid` = "uid_3000000")", @@ -1178,6 +1180,25 @@ Y_UNIT_TEST_SUITE(KqpOlap) { R"(`level` >= CAST("2" As Uint32))", R"(`level` = NULL)", R"(`level` > NULL)", + R"(LENGTH(`uid`) > 0 OR `resource_id` = "10001")", + R"((LENGTH(`uid`) > 0 AND `resource_id` = "10001") OR `resource_id` = "10002")", + R"((LENGTH(`uid`) > 0 OR `resource_id` = "10002") AND (LENGTH(`uid`) < 15 OR `resource_id` = "10001"))", + R"(NOT(LENGTH(`uid`) > 0 AND `resource_id` = "10001"))", + // Not strict function in the beginning causes to disable pushdown + R"(Unwrap(`level`/1) = `level` AND `resource_id` = "10001")", + // We can handle this case in future + R"(NOT(LENGTH(`uid`) > 0 OR `resource_id` = "10001"))", + }; + + std::vector<TString> testDataPartialPush = { + R"(LENGTH(`uid`) > 0 AND `resource_id` = "10001")", + R"(`resource_id` = "10001" AND `level` > 1 AND LENGTH(`uid`) > 0)", + R"(`resource_id` >= "10001" AND LENGTH(`uid`) > 0 AND `level` >= 1 AND `level` < 3)", + R"(LENGTH(`uid`) > 0 AND (`resource_id` >= "10001" OR `level`>= 1 AND `level` <= 3))", + R"(NOT(`resource_id` = "10001" OR `level` >= 1) AND LENGTH(`uid`) > 0)", + R"(NOT(`resource_id` = "10001" AND `level` != 1) AND LENGTH(`uid`) > 0)", + R"(`resource_id` = "10001" AND Unwrap(`level`/1) = `level`)", + R"(`resource_id` = "10001" AND Unwrap(`level`/1) = `level` AND `level` > 1)", }; auto buildQuery = [](const TString& predicate, bool pushEnabled) { @@ -1247,6 +1268,74 @@ Y_UNIT_TEST_SUITE(KqpOlap) { UNIT_ASSERT_C(ast.find("KqpOlapFilter") == std::string::npos, TStringBuilder() << "Predicate pushed down. Query: " << pushQuery); } + + for (const auto& predicate: testDataPartialPush) { + auto normalQuery = buildQuery(predicate, false); + auto pushQuery = buildQuery(predicate, true); + + Cerr << "--- Run normal query ---\n"; + Cerr << normalQuery << Endl; + auto it = tableClient.StreamExecuteScanQuery(normalQuery).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + auto goodResult = CollectStreamResult(it); + + Cerr << "--- Run pushed down query ---\n"; + Cerr << pushQuery << Endl; + it = tableClient.StreamExecuteScanQuery(pushQuery).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + auto pushResult = CollectStreamResult(it); + + if (logQueries) { + Cerr << "Query: " << normalQuery << Endl; + Cerr << "Expected: " << goodResult.ResultSetYson << Endl; + Cerr << "Received: " << pushResult.ResultSetYson << Endl; + } + + CompareYson(goodResult.ResultSetYson, pushResult.ResultSetYson); + + it = tableClient.StreamExecuteScanQuery(pushQuery, scanSettings).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + + auto result = CollectStreamResult(it); + auto ast = result.QueryStats->Getquery_ast(); + + UNIT_ASSERT_C(ast.find("KqpOlapFilter") != std::string::npos, + TStringBuilder() << "Predicate not pushed down. Query: " << pushQuery); + UNIT_ASSERT_C(ast.find("NarrowMap") != std::string::npos, + TStringBuilder() << "NarrowMap was removed. Query: " << pushQuery); + } + } + + Y_UNIT_TEST(PredicatePushdown_MixStrictAndNotStrict) { + auto settings = TKikimrSettings() + .SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + + TStreamExecScanQuerySettings scanSettings; + scanSettings.Explain(true); + + TLocalHelper(kikimr).CreateTestOlapTable(); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 10000, 3000000, 5); + EnableDebugLogging(kikimr); + + auto tableClient = kikimr.GetTableClient(); + auto query = R"( + PRAGMA Kikimr.OptEnablePredicateExtract = "false"; + SELECT `timestamp` FROM `/Root/olapStore/olapTable` WHERE + `resource_id` = "10001" AND Unwrap(`level`/1) = `level` AND `level` > 1; + )"; + + auto it = tableClient.StreamExecuteScanQuery(query, scanSettings).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + + auto result = CollectStreamResult(it); + auto ast = result.QueryStats->Getquery_ast(); + UNIT_ASSERT_C(ast.find(R"("eq" '"resource_id")") != std::string::npos, + TStringBuilder() << "Predicate not pushed down. Query: " << query); + UNIT_ASSERT_C(ast.find(R"("gt" '"level")") == std::string::npos, + TStringBuilder() << "Predicate pushed down. Query: " << query); + UNIT_ASSERT_C(ast.find("NarrowMap") != std::string::npos, + TStringBuilder() << "NarrowMap was removed. Query: " << query); } Y_UNIT_TEST(AggregationCountPushdown) { @@ -2653,7 +2742,36 @@ Y_UNIT_TEST_SUITE(KqpOlap) { .SetExpectedReadNodeType("Aggregate-TableFullScan"); q14.FillExpectedAggregationGroupByPlanOptions(); - TestClickBench({ q7, q9, q12, q14 }); + TAggregationTestCase q22; + q22.SetQuery(R"( + SELECT + SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) + FROM `/Root/benchTable` + WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' + GROUP BY SearchPhrase + ORDER BY c DESC + LIMIT 10; + )") + .AddExpectedPlanOptions("KqpOlapFilter") + .SetExpectedReadNodeType("Aggregate-Filter-TableFullScan"); + q22.FillExpectedAggregationGroupByPlanOptions(); + + TAggregationTestCase q39; + q39.SetQuery(R"( + SELECT TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst, COUNT(*) AS PageViews + FROM `/Root/benchTable` + WHERE CounterID = 62 AND EventDate >= Date('2013-07-01') AND EventDate <= Date('2013-07-31') AND IsRefresh == 0 + GROUP BY + TraficSourceID, SearchEngineID, AdvEngineID, IF (SearchEngineID = 0 AND AdvEngineID = 0, Referer, '') AS Src, + URL AS Dst + ORDER BY PageViews DESC + LIMIT 10; + )") + .AddExpectedPlanOptions("KqpOlapFilter") + .SetExpectedReadNodeType("Aggregate-Filter-TableFullScan"); + q39.FillExpectedAggregationGroupByPlanOptions(); + + TestClickBench({ q7, q9, q12, q14, q22, q39 }); } Y_UNIT_TEST(StatsSysView) { diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-21 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-21 index e553cdd7880..f6f9f083364 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-21 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-21 @@ -61,7 +61,7 @@ }, { "Name": "Filter", - "Predicate": "Apply And item.SearchPhrase != \"\"" + "Predicate": "Apply" }, { "Name": "TableFullScan", @@ -79,6 +79,41 @@ "SsaProgram": { "Command": [ { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Text": "" + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 40 + }, + { + "Id": 106 + } + ], + "Id": 2 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 107 + } + } + }, + { "Projection": { "Columns": [ { diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-22 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-22 index 23d7b2303d4..07324fc556e 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-22 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-22 @@ -75,7 +75,7 @@ }, { "Name": "Filter", - "Predicate": "Apply And Not And item.SearchPhrase != \"\"" + "Predicate": "Apply And Not" }, { "Name": "TableFullScan", @@ -95,6 +95,41 @@ "SsaProgram": { "Command": [ { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Text": "" + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 40 + }, + { + "Id": 106 + } + ], + "Id": 2 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 107 + } + } + }, + { "Projection": { "Columns": [ { @@ -166,7 +201,7 @@ }, { "Name": "Filter", - "Predicate": "Apply And Not And item.SearchPhrase != \"\"" + "Predicate": "Apply And Not" }, { "Name": "TableFullScan", @@ -186,6 +221,41 @@ "SsaProgram": { "Command": [ { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Text": "" + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 40 + }, + { + "Id": 106 + } + ], + "Id": 2 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 107 + } + } + }, + { "Projection": { "Columns": [ { diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-28 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-28 index 2b21d37a9f4..f824e9f0812 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-28 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-28 @@ -56,7 +56,7 @@ "PlanNodeType": "Connection", "Plans": [ { - "Node Type": "Aggregate-Filter-TableFullScan", + "Node Type": "Aggregate-TableFullScan", "Operators": [ { "Aggregation": "{_yql_agg_0: Inc(state._yql_agg_0),_yql_agg_2: MIN(item.Referer)}", @@ -64,10 +64,6 @@ "Name": "Aggregate" }, { - "Name": "Filter", - "Predicate": "item.Referer != \"\"" - }, - { "Name": "TableFullScan", "ReadColumns": [ "Referer" @@ -82,6 +78,41 @@ "SsaProgram": { "Command": [ { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Text": "" + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 15 + }, + { + "Id": 106 + } + ], + "Id": 2 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 107 + } + } + }, + { "Projection": { "Columns": [ { diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-36 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-36 index 5010429aeee..db97565501c 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-36 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-36 @@ -61,7 +61,7 @@ }, { "Name": "Filter", - "Predicate": "item.CounterID == 62 And item.EventDate >= \"15887\" And item.EventDate <= \"15917\" And item.DontCountHits == 0 And item.IsRefresh == 0 And item.URL != \"\"" + "Predicate": "item.EventDate >= \"15887\" And item.EventDate <= \"15917\"" }, { "Name": "TableFullScan", @@ -82,21 +82,185 @@ "SsaProgram": { "Command": [ { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Int32": 62 + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 7 + }, + { + "Id": 106 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 108 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 109 + }, + "Function": { + "Arguments": [ + { + "Id": 62 + }, + { + "Id": 108 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 110 + }, + "Function": { + "Arguments": [ + { + "Id": 107 + }, + { + "Id": 109 + } + ], + "Id": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 111 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 112 + }, + "Function": { + "Arguments": [ + { + "Id": 16 + }, + { + "Id": 111 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 113 + }, + "Constant": { + "Text": "" + } + } + }, + { + "Assign": { + "Column": { + "Id": 114 + }, + "Function": { + "Arguments": [ + { + "Id": 14 + }, + { + "Id": 113 + } + ], + "Id": 2 + } + } + }, + { + "Assign": { + "Column": { + "Id": 115 + }, + "Function": { + "Arguments": [ + { + "Id": 112 + }, + { + "Id": 114 + } + ], + "Id": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 116 + }, + "Function": { + "Arguments": [ + { + "Id": 110 + }, + { + "Id": 115 + } + ], + "Id": 11 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 116 + } + } + }, + { "Projection": { "Columns": [ { - "Id": 7 - }, - { - "Id": 62 - }, - { "Id": 6 }, { - "Id": 16 - }, - { "Id": 14 } ] diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-37 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-37 index 0c2894ddde1..bb18c4ad896 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-37 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-37 @@ -61,7 +61,7 @@ }, { "Name": "Filter", - "Predicate": "item.CounterID == 62 And item.EventDate >= \"15887\" And item.EventDate <= \"15917\" And item.DontCountHits == 0 And item.IsRefresh == 0 And item.Title != \"\"" + "Predicate": "item.EventDate >= \"15887\" And item.EventDate <= \"15917\"" }, { "Name": "TableFullScan", @@ -82,21 +82,185 @@ "SsaProgram": { "Command": [ { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Int32": 62 + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 7 + }, + { + "Id": 106 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 108 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 109 + }, + "Function": { + "Arguments": [ + { + "Id": 62 + }, + { + "Id": 108 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 110 + }, + "Function": { + "Arguments": [ + { + "Id": 107 + }, + { + "Id": 109 + } + ], + "Id": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 111 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 112 + }, + "Function": { + "Arguments": [ + { + "Id": 16 + }, + { + "Id": 111 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 113 + }, + "Constant": { + "Text": "" + } + } + }, + { + "Assign": { + "Column": { + "Id": 114 + }, + "Function": { + "Arguments": [ + { + "Id": 3 + }, + { + "Id": 113 + } + ], + "Id": 2 + } + } + }, + { + "Assign": { + "Column": { + "Id": 115 + }, + "Function": { + "Arguments": [ + { + "Id": 112 + }, + { + "Id": 114 + } + ], + "Id": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 116 + }, + "Function": { + "Arguments": [ + { + "Id": 110 + }, + { + "Id": 115 + } + ], + "Id": 11 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 116 + } + } + }, + { "Projection": { "Columns": [ { - "Id": 7 - }, - { - "Id": 62 - }, - { "Id": 6 }, { - "Id": 16 - }, - { "Id": 3 } ] diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-38 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-38 index 03592394304..74d486843b2 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-38 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-38 @@ -80,7 +80,7 @@ }, { "Name": "Filter", - "Predicate": "item.CounterID == 62 And item.EventDate >= \"15887\" And item.EventDate <= \"15917\" And item.IsRefresh == 0 And item.IsLink != 0 And item.IsDownload == 0" + "Predicate": "item.EventDate >= \"15887\" And item.EventDate <= \"15917\"" }, { "Name": "TableFullScan", @@ -102,24 +102,185 @@ "SsaProgram": { "Command": [ { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Int32": 62 + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 7 + }, + { + "Id": 106 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 108 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 109 + }, + "Function": { + "Arguments": [ + { + "Id": 16 + }, + { + "Id": 108 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 110 + }, + "Function": { + "Arguments": [ + { + "Id": 107 + }, + { + "Id": 109 + } + ], + "Id": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 111 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 112 + }, + "Function": { + "Arguments": [ + { + "Id": 53 + }, + { + "Id": 111 + } + ], + "Id": 2 + } + } + }, + { + "Assign": { + "Column": { + "Id": 113 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 114 + }, + "Function": { + "Arguments": [ + { + "Id": 54 + }, + { + "Id": 113 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 115 + }, + "Function": { + "Arguments": [ + { + "Id": 112 + }, + { + "Id": 114 + } + ], + "Id": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 116 + }, + "Function": { + "Arguments": [ + { + "Id": 110 + }, + { + "Id": 115 + } + ], + "Id": 11 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 116 + } + } + }, + { "Projection": { "Columns": [ { - "Id": 7 - }, - { "Id": 6 }, { - "Id": 54 - }, - { - "Id": 53 - }, - { - "Id": 16 - }, - { "Id": 14 } ] diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-39 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-39 index 5f71553cf21..7bd59004c70 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-39 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-39 @@ -84,7 +84,7 @@ }, { "Name": "Filter", - "Predicate": "item.CounterID == 62 And item.EventDate >= \"15887\" And item.EventDate <= \"15917\" And item.IsRefresh == 0" + "Predicate": "item.EventDate >= \"15887\" And item.EventDate <= \"15917\"" }, { "Name": "TableFullScan", @@ -108,21 +108,96 @@ "SsaProgram": { "Command": [ { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Int32": 62 + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 7 + }, + { + "Id": 106 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 108 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 109 + }, + "Function": { + "Arguments": [ + { + "Id": 16 + }, + { + "Id": 108 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 110 + }, + "Function": { + "Arguments": [ + { + "Id": 107 + }, + { + "Id": 109 + } + ], + "Id": 11 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 110 + } + } + }, + { "Projection": { "Columns": [ { "Id": 41 }, { - "Id": 7 - }, - { "Id": 6 }, { - "Id": 16 - }, - { "Id": 15 }, { diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-40 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-40 index 8468d5f996d..50fc1adafb8 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-40 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-40 @@ -81,7 +81,7 @@ }, { "Name": "Filter", - "Predicate": "item.CounterID == 62 And item.EventDate >= \"15887\" And item.EventDate <= \"15917\" And item.IsRefresh == 0 And item.TraficSourceID == -1 Or item.TraficSourceID == 6 And item.RefererHash == 3594120000172545465" + "Predicate": "item.EventDate >= \"15887\" And item.EventDate <= \"15917\" And item.TraficSourceID == -1 Or item.TraficSourceID == 6" }, { "Name": "TableFullScan", @@ -103,21 +103,139 @@ "SsaProgram": { "Command": [ { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Int32": 62 + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 7 + }, + { + "Id": 106 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 108 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 109 + }, + "Function": { + "Arguments": [ + { + "Id": 16 + }, + { + "Id": 108 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 110 + }, + "Constant": { + "Int64": 3594120000172545465 + } + } + }, + { + "Assign": { + "Column": { + "Id": 111 + }, + "Function": { + "Arguments": [ + { + "Id": 103 + }, + { + "Id": 110 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 112 + }, + "Function": { + "Arguments": [ + { + "Id": 109 + }, + { + "Id": 111 + } + ], + "Id": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 113 + }, + "Function": { + "Arguments": [ + { + "Id": 107 + }, + { + "Id": 112 + } + ], + "Id": 11 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 113 + } + } + }, + { "Projection": { "Columns": [ { - "Id": 7 - }, - { "Id": 6 }, { - "Id": 16 - }, - { - "Id": 103 - }, - { "Id": 38 }, { diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-41 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-41 index a08e2476d44..5e9386d7947 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-41 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-41 @@ -81,7 +81,7 @@ }, { "Name": "Filter", - "Predicate": "item.CounterID == 62 And item.EventDate >= \"15887\" And item.EventDate <= \"15917\" And item.IsRefresh == 0 And item.DontCountHits == 0 And item.URLHash == 2868770270353813622" + "Predicate": "item.EventDate >= \"15887\" And item.EventDate <= \"15917\"" }, { "Name": "TableFullScan", @@ -104,24 +104,185 @@ "SsaProgram": { "Command": [ { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Int32": 62 + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 7 + }, + { + "Id": 106 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 108 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 109 + }, + "Function": { + "Arguments": [ + { + "Id": 16 + }, + { + "Id": 108 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 110 + }, + "Function": { + "Arguments": [ + { + "Id": 107 + }, + { + "Id": 109 + } + ], + "Id": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 111 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 112 + }, + "Function": { + "Arguments": [ + { + "Id": 62 + }, + { + "Id": 111 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 113 + }, + "Constant": { + "Int64": 2868770270353813622 + } + } + }, + { + "Assign": { + "Column": { + "Id": 114 + }, + "Function": { + "Arguments": [ + { + "Id": 104 + }, + { + "Id": 113 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 115 + }, + "Function": { + "Arguments": [ + { + "Id": 112 + }, + { + "Id": 114 + } + ], + "Id": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 116 + }, + "Function": { + "Arguments": [ + { + "Id": 110 + }, + { + "Id": 115 + } + ], + "Id": 11 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 116 + } + } + }, + { "Projection": { "Columns": [ { - "Id": 7 - }, - { - "Id": 62 - }, - { "Id": 6 }, { - "Id": 16 - }, - { - "Id": 104 - }, - { "Id": 44 }, { diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-42 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-42 index be95bdb7483..f3290051429 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-42 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-42 @@ -80,7 +80,7 @@ }, { "Name": "Filter", - "Predicate": "item.CounterID == 62 And item.EventDate >= \"15900\" And item.EventDate <= \"15901\" And item.IsRefresh == 0 And item.DontCountHits == 0" + "Predicate": "item.EventDate >= \"15900\" And item.EventDate <= \"15901\"" }, { "Name": "TableFullScan", @@ -101,22 +101,140 @@ "SsaProgram": { "Command": [ { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Int32": 62 + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 7 + }, + { + "Id": 106 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 108 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 109 + }, + "Function": { + "Arguments": [ + { + "Id": 16 + }, + { + "Id": 108 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 110 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 111 + }, + "Function": { + "Arguments": [ + { + "Id": 62 + }, + { + "Id": 110 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 112 + }, + "Function": { + "Arguments": [ + { + "Id": 109 + }, + { + "Id": 111 + } + ], + "Id": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 113 + }, + "Function": { + "Arguments": [ + { + "Id": 107 + }, + { + "Id": 112 + } + ], + "Id": 11 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 113 + } + } + }, + { "Projection": { "Columns": [ { - "Id": 7 - }, - { - "Id": 62 - }, - { "Id": 6 }, { "Id": 5 - }, - { - "Id": 16 } ] } |