diff options
20 files changed, 1393 insertions, 140 deletions
diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp index d75ce0979db..1b102843829 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter.cpp @@ -3,6 +3,7 @@ #include <ydb/core/kqp/common/kqp_yql.h> #include <ydb/library/yql/core/extract_predicate/extract_predicate.h> +#include <ydb/library/yql/core/yql_opt_utils.h> namespace NKikimr::NKqp::NOpt { @@ -17,39 +18,6 @@ bool IsFalseLiteral(TExprBase node) { return node.Maybe<TCoBool>() && !FromString<bool>(node.Cast<TCoBool>().Literal().Value()); } -bool ValidateIfArgument(const TCoOptionalIf& optionalIf, const TExprNode* rawLambdaArg) { - // Check it is SELECT * or SELECT `field1`, `field2`... - if (optionalIf.Value().Raw() == rawLambdaArg) { - return true; - } - - // Ok, maybe it is SELECT `field1`, `field2` ? - auto maybeAsStruct = optionalIf.Value().Maybe<TCoAsStruct>(); - if (!maybeAsStruct) { - return false; - } - - for (auto arg : maybeAsStruct.Cast()) { - // Check that second tuple element is Member(lambda arg) - auto tuple = arg.Maybe<TExprList>().Cast(); - if (tuple.Size() != 2) { - return false; - } - - auto maybeMember = tuple.Item(1).Maybe<TCoMember>(); - if (!maybeMember) { - return false; - } - - auto member = maybeMember.Cast(); - if (member.Struct().Raw() != rawLambdaArg) { - return false; - } - } - - return true; -} - TVector<TExprBase> ConvertComparisonNode(const TExprBase& nodeIn) { TVector<TExprBase> out; @@ -417,6 +385,40 @@ TMaybeNode<TExprBase> PredicatePushdown(const TExprBase& predicate, TExprContext .Done(); } +void SplitForPartialPushdown(const TPredicateNode& predicateTree, TPredicateNode& predicatesToPush, TPredicateNode& remainingPredicates, + TExprContext& ctx, TPositionHandle pos) +{ + if (predicateTree.CanBePushed) { + predicatesToPush = predicateTree; + remainingPredicates.ExprNode = Build<TCoBool>(ctx, pos).Literal().Build("true").Done(); + return; + } + + if (predicateTree.Op != EBoolOp::And) { + // We can partially pushdown predicates from AND operator only. + // For OR operator we would need to have several read operators which is not acceptable. + // TODO: Add support for NOT(op1 OR op2), because it expands to (!op1 AND !op2). + remainingPredicates = predicateTree; + return; + } + + bool isFoundNotStrictOp = false; + std::vector<TPredicateNode> pushable; + std::vector<TPredicateNode> remaining; + for (auto& predicate : predicateTree.Children) { + if (predicate.CanBePushed && !isFoundNotStrictOp) { + pushable.emplace_back(predicate); + } else { + if (!IsStrict(predicate.ExprNode.Cast().Ptr())) { + isFoundNotStrictOp = true; + } + remaining.emplace_back(predicate); + } + } + predicatesToPush.SetPredicates(pushable, ctx, pos); + remainingPredicates.SetPredicates(remaining, ctx, pos); +} + } // anonymous namespace end TExprBase KqpPushOlapFilter(TExprBase node, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx, @@ -450,19 +452,14 @@ TExprBase KqpPushOlapFilter(TExprBase node, TExprContext& ctx, const TKqpOptimiz } auto optionalIf = maybeOptionalIf.Cast(); - if (!ValidateIfArgument(optionalIf, lambdaArg)) { - return node; - } - TPredicateNode predicateTree(optionalIf.Predicate()); CollectPredicates(optionalIf.Predicate(), predicateTree, lambdaArg, read.Process().Body()); + YQL_ENSURE(predicateTree.IsValid(), "Collected OLAP predicates are invalid"); TPredicateNode predicatesToPush; TPredicateNode remainingPredicates; - if (predicateTree.CanBePushed) { - predicatesToPush = predicateTree; - remainingPredicates.ExprNode = Build<TCoBool>(ctx, node.Pos()).Literal().Build("true").Done(); - } else { + SplitForPartialPushdown(predicateTree, predicatesToPush, remainingPredicates, ctx, node.Pos()); + if (!predicatesToPush.IsValid()) { return node; } @@ -516,7 +513,10 @@ TExprBase KqpPushOlapFilter(TExprBase node, TExprContext& ctx, const TKqpOptimiz .Lambda<TCoLambda>() .Args({"new_arg"}) .Body<TCoOptionalIf>() - .Predicate(remainingPredicates.ExprNode.Cast()) + .Predicate<TExprApplier>() + .Apply(remainingPredicates.ExprNode.Cast()) + .With(lambda.Args().Arg(0), "new_arg") + .Build() .Value<TExprApplier>() .Apply(optionalIf.Value()) .With(lambda.Args().Arg(0), "new_arg") diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter_collection.cpp b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter_collection.cpp index f253538fe45..cf2c08b5324 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter_collection.cpp +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter_collection.cpp @@ -337,11 +337,6 @@ bool CoalesceCanBePushed(const TCoCoalesce& coalesce, const TExprNode* lambdaArg return false; } - if (coalesce.Value().Cast<TCoBool>().Literal().Value() != "false") { - // Maybe we don't need this check - return false; - } - if (auto maybeCompare = coalesce.Predicate().Maybe<TCoCompare>()) { return CompareCanBePushed(maybeCompare.Cast(), lambdaArg, lambdaBody); } else if (auto maybeFlatmap = coalesce.Predicate().Maybe<TCoFlatMap>()) { @@ -378,6 +373,41 @@ void CollectPredicatesForBinaryBoolOperators(const TExprBase& opNode, TPredicate } // anonymous namespace end +bool TPredicateNode::IsValid() const { + bool res = true; + if (Op != EBoolOp::Undefined) { + res &= !Children.empty(); + for (auto& child : Children) { + res &= child.IsValid(); + } + } + + return res && ExprNode.IsValid(); +} + +void TPredicateNode::SetPredicates(const std::vector<TPredicateNode>& predicates, TExprContext& ctx, TPositionHandle pos) { + auto predicatesSize = predicates.size(); + if (predicatesSize == 0) { + return; + } else if (predicatesSize == 1) { + *this = predicates[0]; + } else { + Op = EBoolOp::And; + Children = predicates; + CanBePushed = true; + + TVector<TExprBase> exprNodes; + exprNodes.reserve(predicatesSize); + for (auto& pred : predicates) { + exprNodes.emplace_back(pred.ExprNode.Cast()); + CanBePushed &= pred.CanBePushed; + } + ExprNode = Build<TCoAnd>(ctx, pos) + .Add(exprNodes) + .Done(); + } +} + void CollectPredicates(const TExprBase& predicate, TPredicateNode& predicateTree, const TExprNode* lambdaArg, const TExprBase& lambdaBody) { if (predicate.Maybe<TCoCoalesce>()) { auto coalesce = predicate.Cast<TCoCoalesce>(); diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter_collection.h b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter_collection.h index 005cbecc8b4..cfec72ac53d 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter_collection.h +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_filter_collection.h @@ -40,17 +40,8 @@ struct TPredicateNode { ~TPredicateNode() {} - bool IsValid() const { - bool res = true; - if (Op != EBoolOp::Undefined) { - res &= !Children.empty(); - for (auto& child : Children) { - res &= child.IsValid(); - } - } - - return res && ExprNode.IsValid(); - } + bool IsValid() const; + void SetPredicates(const std::vector<TPredicateNode>& predicates, NYql::TExprContext& ctx, NYql::TPositionHandle pos); NYql::NNodes::TMaybeNode<NYql::NNodes::TExprBase> ExprNode; std::vector<TPredicateNode> Children; diff --git a/ydb/core/kqp/ut/common/CMakeLists.darwin-x86_64.txt b/ydb/core/kqp/ut/common/CMakeLists.darwin-x86_64.txt index fd14efe2ef1..ac4ece79a0a 100644 --- a/ydb/core/kqp/ut/common/CMakeLists.darwin-x86_64.txt +++ b/ydb/core/kqp/ut/common/CMakeLists.darwin-x86_64.txt @@ -27,4 +27,5 @@ target_link_libraries(kqp-ut-common PUBLIC ) target_sources(kqp-ut-common PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/kqp_ut_common.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/re2_udf.cpp ) diff --git a/ydb/core/kqp/ut/common/CMakeLists.linux-aarch64.txt b/ydb/core/kqp/ut/common/CMakeLists.linux-aarch64.txt index 6f4af35c966..84aa8d90e3b 100644 --- a/ydb/core/kqp/ut/common/CMakeLists.linux-aarch64.txt +++ b/ydb/core/kqp/ut/common/CMakeLists.linux-aarch64.txt @@ -28,4 +28,5 @@ target_link_libraries(kqp-ut-common PUBLIC ) target_sources(kqp-ut-common PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/kqp_ut_common.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/re2_udf.cpp ) diff --git a/ydb/core/kqp/ut/common/CMakeLists.linux-x86_64.txt b/ydb/core/kqp/ut/common/CMakeLists.linux-x86_64.txt index 6f4af35c966..84aa8d90e3b 100644 --- a/ydb/core/kqp/ut/common/CMakeLists.linux-x86_64.txt +++ b/ydb/core/kqp/ut/common/CMakeLists.linux-x86_64.txt @@ -28,4 +28,5 @@ target_link_libraries(kqp-ut-common PUBLIC ) target_sources(kqp-ut-common PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/kqp_ut_common.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/re2_udf.cpp ) diff --git a/ydb/core/kqp/ut/common/CMakeLists.windows-x86_64.txt b/ydb/core/kqp/ut/common/CMakeLists.windows-x86_64.txt index fd14efe2ef1..ac4ece79a0a 100644 --- a/ydb/core/kqp/ut/common/CMakeLists.windows-x86_64.txt +++ b/ydb/core/kqp/ut/common/CMakeLists.windows-x86_64.txt @@ -27,4 +27,5 @@ target_link_libraries(kqp-ut-common PUBLIC ) target_sources(kqp-ut-common PRIVATE ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/kqp_ut_common.cpp + ${CMAKE_SOURCE_DIR}/ydb/core/kqp/ut/common/re2_udf.cpp ) diff --git a/ydb/core/kqp/ut/common/kqp_ut_common.cpp b/ydb/core/kqp/ut/common/kqp_ut_common.cpp index cc2ca688bd6..8bbf6325659 100644 --- a/ydb/core/kqp/ut/common/kqp_ut_common.cpp +++ b/ydb/core/kqp/ut/common/kqp_ut_common.cpp @@ -50,11 +50,13 @@ SIMPLE_UDF(TRandString, char*(ui32)) { } SIMPLE_MODULE(TTestUdfsModule, TTestFilter, TTestFilterTerminate, TRandString); +NYql::NUdf::TUniquePtr<NYql::NUdf::IUdfModule> CreateRe2Module(); NMiniKQL::IFunctionRegistry* UdfFrFactory(const NScheme::TTypeRegistry& typeRegistry) { Y_UNUSED(typeRegistry); auto funcRegistry = NMiniKQL::CreateFunctionRegistry(NMiniKQL::CreateBuiltinRegistry())->Clone(); funcRegistry->AddModule("", "TestUdfs", new TTestUdfsModule()); + funcRegistry->AddModule("", "Re2", CreateRe2Module()); NKikimr::NMiniKQL::FillStaticModules(*funcRegistry); return funcRegistry.Release(); } diff --git a/ydb/core/kqp/ut/common/re2_udf.cpp b/ydb/core/kqp/ut/common/re2_udf.cpp new file mode 100644 index 00000000000..f5582be121a --- /dev/null +++ b/ydb/core/kqp/ut/common/re2_udf.cpp @@ -0,0 +1,11 @@ +// HACK: the TRe2Module class is in an anonymous namespace +// so including the source cpp is the only way to access it +#include <ydb/library/yql/udfs/common/re2/re2_udf.cpp> + +namespace NKikimr::NKqp { + +NYql::NUdf::TUniquePtr<NYql::NUdf::IUdfModule> CreateRe2Module() { + return new TRe2Module<true>(); +} + +} // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp index c38f8a05316..34871b51924 100644 --- a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp +++ b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp @@ -1150,7 +1150,9 @@ Y_UNIT_TEST_SUITE(KqpOlap) { R"((`level`, `uid`, `resource_id`) != (Int32("0"), "uid_3000001", "10011"))", R"(`level` = 0 OR `level` = 2 OR `level` = 1)", R"(`level` = 0 OR (`level` = 2 AND `uid` = "uid_3000002"))", + R"(`level` = 0 OR NOT(`level` = 2 AND `uid` = "uid_3000002"))", R"(`level` = 0 AND (`uid` = "uid_3000000" OR `uid` = "uid_3000002"))", + R"(`level` = 0 AND NOT(`uid` = "uid_3000000" OR `uid` = "uid_3000002"))", R"(`level` = 0 OR `uid` = "uid_3000003")", R"(`level` = 0 AND `uid` = "uid_3000003")", R"(`level` = 0 AND `uid` = "uid_3000000")", @@ -1178,6 +1180,25 @@ Y_UNIT_TEST_SUITE(KqpOlap) { R"(`level` >= CAST("2" As Uint32))", R"(`level` = NULL)", R"(`level` > NULL)", + R"(LENGTH(`uid`) > 0 OR `resource_id` = "10001")", + R"((LENGTH(`uid`) > 0 AND `resource_id` = "10001") OR `resource_id` = "10002")", + R"((LENGTH(`uid`) > 0 OR `resource_id` = "10002") AND (LENGTH(`uid`) < 15 OR `resource_id` = "10001"))", + R"(NOT(LENGTH(`uid`) > 0 AND `resource_id` = "10001"))", + // Not strict function in the beginning causes to disable pushdown + R"(Unwrap(`level`/1) = `level` AND `resource_id` = "10001")", + // We can handle this case in future + R"(NOT(LENGTH(`uid`) > 0 OR `resource_id` = "10001"))", + }; + + std::vector<TString> testDataPartialPush = { + R"(LENGTH(`uid`) > 0 AND `resource_id` = "10001")", + R"(`resource_id` = "10001" AND `level` > 1 AND LENGTH(`uid`) > 0)", + R"(`resource_id` >= "10001" AND LENGTH(`uid`) > 0 AND `level` >= 1 AND `level` < 3)", + R"(LENGTH(`uid`) > 0 AND (`resource_id` >= "10001" OR `level`>= 1 AND `level` <= 3))", + R"(NOT(`resource_id` = "10001" OR `level` >= 1) AND LENGTH(`uid`) > 0)", + R"(NOT(`resource_id` = "10001" AND `level` != 1) AND LENGTH(`uid`) > 0)", + R"(`resource_id` = "10001" AND Unwrap(`level`/1) = `level`)", + R"(`resource_id` = "10001" AND Unwrap(`level`/1) = `level` AND `level` > 1)", }; auto buildQuery = [](const TString& predicate, bool pushEnabled) { @@ -1247,6 +1268,74 @@ Y_UNIT_TEST_SUITE(KqpOlap) { UNIT_ASSERT_C(ast.find("KqpOlapFilter") == std::string::npos, TStringBuilder() << "Predicate pushed down. Query: " << pushQuery); } + + for (const auto& predicate: testDataPartialPush) { + auto normalQuery = buildQuery(predicate, false); + auto pushQuery = buildQuery(predicate, true); + + Cerr << "--- Run normal query ---\n"; + Cerr << normalQuery << Endl; + auto it = tableClient.StreamExecuteScanQuery(normalQuery).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + auto goodResult = CollectStreamResult(it); + + Cerr << "--- Run pushed down query ---\n"; + Cerr << pushQuery << Endl; + it = tableClient.StreamExecuteScanQuery(pushQuery).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + auto pushResult = CollectStreamResult(it); + + if (logQueries) { + Cerr << "Query: " << normalQuery << Endl; + Cerr << "Expected: " << goodResult.ResultSetYson << Endl; + Cerr << "Received: " << pushResult.ResultSetYson << Endl; + } + + CompareYson(goodResult.ResultSetYson, pushResult.ResultSetYson); + + it = tableClient.StreamExecuteScanQuery(pushQuery, scanSettings).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + + auto result = CollectStreamResult(it); + auto ast = result.QueryStats->Getquery_ast(); + + UNIT_ASSERT_C(ast.find("KqpOlapFilter") != std::string::npos, + TStringBuilder() << "Predicate not pushed down. Query: " << pushQuery); + UNIT_ASSERT_C(ast.find("NarrowMap") != std::string::npos, + TStringBuilder() << "NarrowMap was removed. Query: " << pushQuery); + } + } + + Y_UNIT_TEST(PredicatePushdown_MixStrictAndNotStrict) { + auto settings = TKikimrSettings() + .SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + + TStreamExecScanQuerySettings scanSettings; + scanSettings.Explain(true); + + TLocalHelper(kikimr).CreateTestOlapTable(); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 10000, 3000000, 5); + EnableDebugLogging(kikimr); + + auto tableClient = kikimr.GetTableClient(); + auto query = R"( + PRAGMA Kikimr.OptEnablePredicateExtract = "false"; + SELECT `timestamp` FROM `/Root/olapStore/olapTable` WHERE + `resource_id` = "10001" AND Unwrap(`level`/1) = `level` AND `level` > 1; + )"; + + auto it = tableClient.StreamExecuteScanQuery(query, scanSettings).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + + auto result = CollectStreamResult(it); + auto ast = result.QueryStats->Getquery_ast(); + UNIT_ASSERT_C(ast.find(R"("eq" '"resource_id")") != std::string::npos, + TStringBuilder() << "Predicate not pushed down. Query: " << query); + UNIT_ASSERT_C(ast.find(R"("gt" '"level")") == std::string::npos, + TStringBuilder() << "Predicate pushed down. Query: " << query); + UNIT_ASSERT_C(ast.find("NarrowMap") != std::string::npos, + TStringBuilder() << "NarrowMap was removed. Query: " << query); } Y_UNIT_TEST(AggregationCountPushdown) { @@ -2653,7 +2742,36 @@ Y_UNIT_TEST_SUITE(KqpOlap) { .SetExpectedReadNodeType("Aggregate-TableFullScan"); q14.FillExpectedAggregationGroupByPlanOptions(); - TestClickBench({ q7, q9, q12, q14 }); + TAggregationTestCase q22; + q22.SetQuery(R"( + SELECT + SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) + FROM `/Root/benchTable` + WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' + GROUP BY SearchPhrase + ORDER BY c DESC + LIMIT 10; + )") + .AddExpectedPlanOptions("KqpOlapFilter") + .SetExpectedReadNodeType("Aggregate-Filter-TableFullScan"); + q22.FillExpectedAggregationGroupByPlanOptions(); + + TAggregationTestCase q39; + q39.SetQuery(R"( + SELECT TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst, COUNT(*) AS PageViews + FROM `/Root/benchTable` + WHERE CounterID = 62 AND EventDate >= Date('2013-07-01') AND EventDate <= Date('2013-07-31') AND IsRefresh == 0 + GROUP BY + TraficSourceID, SearchEngineID, AdvEngineID, IF (SearchEngineID = 0 AND AdvEngineID = 0, Referer, '') AS Src, + URL AS Dst + ORDER BY PageViews DESC + LIMIT 10; + )") + .AddExpectedPlanOptions("KqpOlapFilter") + .SetExpectedReadNodeType("Aggregate-Filter-TableFullScan"); + q39.FillExpectedAggregationGroupByPlanOptions(); + + TestClickBench({ q7, q9, q12, q14, q22, q39 }); } Y_UNIT_TEST(StatsSysView) { diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-21 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-21 index e553cdd7880..f6f9f083364 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-21 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-21 @@ -61,7 +61,7 @@ }, { "Name": "Filter", - "Predicate": "Apply And item.SearchPhrase != \"\"" + "Predicate": "Apply" }, { "Name": "TableFullScan", @@ -79,6 +79,41 @@ "SsaProgram": { "Command": [ { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Text": "" + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 40 + }, + { + "Id": 106 + } + ], + "Id": 2 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 107 + } + } + }, + { "Projection": { "Columns": [ { diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-22 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-22 index 23d7b2303d4..07324fc556e 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-22 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-22 @@ -75,7 +75,7 @@ }, { "Name": "Filter", - "Predicate": "Apply And Not And item.SearchPhrase != \"\"" + "Predicate": "Apply And Not" }, { "Name": "TableFullScan", @@ -95,6 +95,41 @@ "SsaProgram": { "Command": [ { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Text": "" + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 40 + }, + { + "Id": 106 + } + ], + "Id": 2 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 107 + } + } + }, + { "Projection": { "Columns": [ { @@ -166,7 +201,7 @@ }, { "Name": "Filter", - "Predicate": "Apply And Not And item.SearchPhrase != \"\"" + "Predicate": "Apply And Not" }, { "Name": "TableFullScan", @@ -186,6 +221,41 @@ "SsaProgram": { "Command": [ { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Text": "" + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 40 + }, + { + "Id": 106 + } + ], + "Id": 2 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 107 + } + } + }, + { "Projection": { "Columns": [ { diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-28 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-28 index 2b21d37a9f4..f824e9f0812 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-28 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-28 @@ -56,7 +56,7 @@ "PlanNodeType": "Connection", "Plans": [ { - "Node Type": "Aggregate-Filter-TableFullScan", + "Node Type": "Aggregate-TableFullScan", "Operators": [ { "Aggregation": "{_yql_agg_0: Inc(state._yql_agg_0),_yql_agg_2: MIN(item.Referer)}", @@ -64,10 +64,6 @@ "Name": "Aggregate" }, { - "Name": "Filter", - "Predicate": "item.Referer != \"\"" - }, - { "Name": "TableFullScan", "ReadColumns": [ "Referer" @@ -82,6 +78,41 @@ "SsaProgram": { "Command": [ { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Text": "" + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 15 + }, + { + "Id": 106 + } + ], + "Id": 2 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 107 + } + } + }, + { "Projection": { "Columns": [ { diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-36 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-36 index 5010429aeee..db97565501c 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-36 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-36 @@ -61,7 +61,7 @@ }, { "Name": "Filter", - "Predicate": "item.CounterID == 62 And item.EventDate >= \"15887\" And item.EventDate <= \"15917\" And item.DontCountHits == 0 And item.IsRefresh == 0 And item.URL != \"\"" + "Predicate": "item.EventDate >= \"15887\" And item.EventDate <= \"15917\"" }, { "Name": "TableFullScan", @@ -82,21 +82,185 @@ "SsaProgram": { "Command": [ { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Int32": 62 + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 7 + }, + { + "Id": 106 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 108 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 109 + }, + "Function": { + "Arguments": [ + { + "Id": 62 + }, + { + "Id": 108 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 110 + }, + "Function": { + "Arguments": [ + { + "Id": 107 + }, + { + "Id": 109 + } + ], + "Id": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 111 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 112 + }, + "Function": { + "Arguments": [ + { + "Id": 16 + }, + { + "Id": 111 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 113 + }, + "Constant": { + "Text": "" + } + } + }, + { + "Assign": { + "Column": { + "Id": 114 + }, + "Function": { + "Arguments": [ + { + "Id": 14 + }, + { + "Id": 113 + } + ], + "Id": 2 + } + } + }, + { + "Assign": { + "Column": { + "Id": 115 + }, + "Function": { + "Arguments": [ + { + "Id": 112 + }, + { + "Id": 114 + } + ], + "Id": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 116 + }, + "Function": { + "Arguments": [ + { + "Id": 110 + }, + { + "Id": 115 + } + ], + "Id": 11 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 116 + } + } + }, + { "Projection": { "Columns": [ { - "Id": 7 - }, - { - "Id": 62 - }, - { "Id": 6 }, { - "Id": 16 - }, - { "Id": 14 } ] diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-37 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-37 index 0c2894ddde1..bb18c4ad896 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-37 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-37 @@ -61,7 +61,7 @@ }, { "Name": "Filter", - "Predicate": "item.CounterID == 62 And item.EventDate >= \"15887\" And item.EventDate <= \"15917\" And item.DontCountHits == 0 And item.IsRefresh == 0 And item.Title != \"\"" + "Predicate": "item.EventDate >= \"15887\" And item.EventDate <= \"15917\"" }, { "Name": "TableFullScan", @@ -82,21 +82,185 @@ "SsaProgram": { "Command": [ { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Int32": 62 + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 7 + }, + { + "Id": 106 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 108 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 109 + }, + "Function": { + "Arguments": [ + { + "Id": 62 + }, + { + "Id": 108 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 110 + }, + "Function": { + "Arguments": [ + { + "Id": 107 + }, + { + "Id": 109 + } + ], + "Id": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 111 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 112 + }, + "Function": { + "Arguments": [ + { + "Id": 16 + }, + { + "Id": 111 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 113 + }, + "Constant": { + "Text": "" + } + } + }, + { + "Assign": { + "Column": { + "Id": 114 + }, + "Function": { + "Arguments": [ + { + "Id": 3 + }, + { + "Id": 113 + } + ], + "Id": 2 + } + } + }, + { + "Assign": { + "Column": { + "Id": 115 + }, + "Function": { + "Arguments": [ + { + "Id": 112 + }, + { + "Id": 114 + } + ], + "Id": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 116 + }, + "Function": { + "Arguments": [ + { + "Id": 110 + }, + { + "Id": 115 + } + ], + "Id": 11 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 116 + } + } + }, + { "Projection": { "Columns": [ { - "Id": 7 - }, - { - "Id": 62 - }, - { "Id": 6 }, { - "Id": 16 - }, - { "Id": 3 } ] diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-38 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-38 index 03592394304..74d486843b2 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-38 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-38 @@ -80,7 +80,7 @@ }, { "Name": "Filter", - "Predicate": "item.CounterID == 62 And item.EventDate >= \"15887\" And item.EventDate <= \"15917\" And item.IsRefresh == 0 And item.IsLink != 0 And item.IsDownload == 0" + "Predicate": "item.EventDate >= \"15887\" And item.EventDate <= \"15917\"" }, { "Name": "TableFullScan", @@ -102,24 +102,185 @@ "SsaProgram": { "Command": [ { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Int32": 62 + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 7 + }, + { + "Id": 106 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 108 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 109 + }, + "Function": { + "Arguments": [ + { + "Id": 16 + }, + { + "Id": 108 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 110 + }, + "Function": { + "Arguments": [ + { + "Id": 107 + }, + { + "Id": 109 + } + ], + "Id": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 111 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 112 + }, + "Function": { + "Arguments": [ + { + "Id": 53 + }, + { + "Id": 111 + } + ], + "Id": 2 + } + } + }, + { + "Assign": { + "Column": { + "Id": 113 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 114 + }, + "Function": { + "Arguments": [ + { + "Id": 54 + }, + { + "Id": 113 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 115 + }, + "Function": { + "Arguments": [ + { + "Id": 112 + }, + { + "Id": 114 + } + ], + "Id": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 116 + }, + "Function": { + "Arguments": [ + { + "Id": 110 + }, + { + "Id": 115 + } + ], + "Id": 11 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 116 + } + } + }, + { "Projection": { "Columns": [ { - "Id": 7 - }, - { "Id": 6 }, { - "Id": 54 - }, - { - "Id": 53 - }, - { - "Id": 16 - }, - { "Id": 14 } ] diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-39 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-39 index 5f71553cf21..7bd59004c70 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-39 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-39 @@ -84,7 +84,7 @@ }, { "Name": "Filter", - "Predicate": "item.CounterID == 62 And item.EventDate >= \"15887\" And item.EventDate <= \"15917\" And item.IsRefresh == 0" + "Predicate": "item.EventDate >= \"15887\" And item.EventDate <= \"15917\"" }, { "Name": "TableFullScan", @@ -108,21 +108,96 @@ "SsaProgram": { "Command": [ { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Int32": 62 + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 7 + }, + { + "Id": 106 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 108 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 109 + }, + "Function": { + "Arguments": [ + { + "Id": 16 + }, + { + "Id": 108 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 110 + }, + "Function": { + "Arguments": [ + { + "Id": 107 + }, + { + "Id": 109 + } + ], + "Id": 11 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 110 + } + } + }, + { "Projection": { "Columns": [ { "Id": 41 }, { - "Id": 7 - }, - { "Id": 6 }, { - "Id": 16 - }, - { "Id": 15 }, { diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-40 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-40 index 8468d5f996d..50fc1adafb8 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-40 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-40 @@ -81,7 +81,7 @@ }, { "Name": "Filter", - "Predicate": "item.CounterID == 62 And item.EventDate >= \"15887\" And item.EventDate <= \"15917\" And item.IsRefresh == 0 And item.TraficSourceID == -1 Or item.TraficSourceID == 6 And item.RefererHash == 3594120000172545465" + "Predicate": "item.EventDate >= \"15887\" And item.EventDate <= \"15917\" And item.TraficSourceID == -1 Or item.TraficSourceID == 6" }, { "Name": "TableFullScan", @@ -103,21 +103,139 @@ "SsaProgram": { "Command": [ { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Int32": 62 + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 7 + }, + { + "Id": 106 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 108 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 109 + }, + "Function": { + "Arguments": [ + { + "Id": 16 + }, + { + "Id": 108 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 110 + }, + "Constant": { + "Int64": 3594120000172545465 + } + } + }, + { + "Assign": { + "Column": { + "Id": 111 + }, + "Function": { + "Arguments": [ + { + "Id": 103 + }, + { + "Id": 110 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 112 + }, + "Function": { + "Arguments": [ + { + "Id": 109 + }, + { + "Id": 111 + } + ], + "Id": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 113 + }, + "Function": { + "Arguments": [ + { + "Id": 107 + }, + { + "Id": 112 + } + ], + "Id": 11 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 113 + } + } + }, + { "Projection": { "Columns": [ { - "Id": 7 - }, - { "Id": 6 }, { - "Id": 16 - }, - { - "Id": 103 - }, - { "Id": 38 }, { diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-41 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-41 index a08e2476d44..5e9386d7947 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-41 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-41 @@ -81,7 +81,7 @@ }, { "Name": "Filter", - "Predicate": "item.CounterID == 62 And item.EventDate >= \"15887\" And item.EventDate <= \"15917\" And item.IsRefresh == 0 And item.DontCountHits == 0 And item.URLHash == 2868770270353813622" + "Predicate": "item.EventDate >= \"15887\" And item.EventDate <= \"15917\"" }, { "Name": "TableFullScan", @@ -104,24 +104,185 @@ "SsaProgram": { "Command": [ { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Int32": 62 + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 7 + }, + { + "Id": 106 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 108 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 109 + }, + "Function": { + "Arguments": [ + { + "Id": 16 + }, + { + "Id": 108 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 110 + }, + "Function": { + "Arguments": [ + { + "Id": 107 + }, + { + "Id": 109 + } + ], + "Id": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 111 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 112 + }, + "Function": { + "Arguments": [ + { + "Id": 62 + }, + { + "Id": 111 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 113 + }, + "Constant": { + "Int64": 2868770270353813622 + } + } + }, + { + "Assign": { + "Column": { + "Id": 114 + }, + "Function": { + "Arguments": [ + { + "Id": 104 + }, + { + "Id": 113 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 115 + }, + "Function": { + "Arguments": [ + { + "Id": 112 + }, + { + "Id": 114 + } + ], + "Id": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 116 + }, + "Function": { + "Arguments": [ + { + "Id": 110 + }, + { + "Id": 115 + } + ], + "Id": 11 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 116 + } + } + }, + { "Projection": { "Columns": [ { - "Id": 7 - }, - { - "Id": 62 - }, - { "Id": 6 }, { - "Id": 16 - }, - { - "Id": 104 - }, - { "Id": 44 }, { diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-42 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-42 index be95bdb7483..f3290051429 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-42 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-42 @@ -80,7 +80,7 @@ }, { "Name": "Filter", - "Predicate": "item.CounterID == 62 And item.EventDate >= \"15900\" And item.EventDate <= \"15901\" And item.IsRefresh == 0 And item.DontCountHits == 0" + "Predicate": "item.EventDate >= \"15900\" And item.EventDate <= \"15901\"" }, { "Name": "TableFullScan", @@ -101,22 +101,140 @@ "SsaProgram": { "Command": [ { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Int32": 62 + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 7 + }, + { + "Id": 106 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 108 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 109 + }, + "Function": { + "Arguments": [ + { + "Id": 16 + }, + { + "Id": 108 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 110 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 111 + }, + "Function": { + "Arguments": [ + { + "Id": 62 + }, + { + "Id": 110 + } + ], + "Id": 1 + } + } + }, + { + "Assign": { + "Column": { + "Id": 112 + }, + "Function": { + "Arguments": [ + { + "Id": 109 + }, + { + "Id": 111 + } + ], + "Id": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 113 + }, + "Function": { + "Arguments": [ + { + "Id": 107 + }, + { + "Id": 112 + } + ], + "Id": 11 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 113 + } + } + }, + { "Projection": { "Columns": [ { - "Id": 7 - }, - { - "Id": 62 - }, - { "Id": 6 }, { "Id": 5 - }, - { - "Id": 16 } ] } |
