diff options
| author | Filitov Mikhail <[email protected]> | 2026-06-18 06:23:53 +0200 |
|---|---|---|
| committer | GitHub <[email protected]> | 2026-06-18 07:23:53 +0300 |
| commit | eb5c7aa4ab32bc0a30371fd39f63efb8a80130e4 (patch) | |
| tree | 7e1d9538fc4aa9237848a1e56cdbe29dac9f9eec | |
| parent | 1b9f600f8b921bb9bc462a7396defeb032801ee2 (diff) | |
[BHJ] fix side selection for left join with large right side (#43761)
| -rw-r--r-- | ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp | 4 | ||||
| -rw-r--r-- | ydb/core/kqp/ut/join/kqp_block_hash_join_ut.cpp | 92 |
2 files changed, 94 insertions, 2 deletions
diff --git a/ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp b/ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp index 6b274d0a177..b4de5fdbcac 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp @@ -251,8 +251,8 @@ double TKqpProviderContext::ComputeJoinCost( + CONSTS_GRACEJOIN_OUTPUT_MULT * std::pow(estimatedOutputByteSize, CONSTS_GRACEJOIN_OUTPUT_POW)); } case EJoinAlgoType::ReverseBlockJoin: { - return 1.5 * (CONSTS_GRACEJOIN_LEFT_SIDE_MULT * std::pow(leftSideByteSize, CONSTS_GRACEJOIN_LEFT_SIDE_POW) - + CONSTS_GRACEJOIN_RIGHT_SIDE_MULT * std::pow(rightSideByteSize, CONSTS_GRACEJOIN_RIGHT_SIDE_POW) + return 1.5 * (CONSTS_GRACEJOIN_LEFT_SIDE_MULT * std::pow(rightSideByteSize, CONSTS_GRACEJOIN_LEFT_SIDE_POW) + + CONSTS_GRACEJOIN_RIGHT_SIDE_MULT * std::pow(leftSideByteSize, CONSTS_GRACEJOIN_RIGHT_SIDE_POW) + CONSTS_GRACEJOIN_OUTPUT_MULT * std::pow(estimatedOutputByteSize, CONSTS_GRACEJOIN_OUTPUT_POW)); } default: diff --git a/ydb/core/kqp/ut/join/kqp_block_hash_join_ut.cpp b/ydb/core/kqp/ut/join/kqp_block_hash_join_ut.cpp index f2efa760338..6335c9de5fb 100644 --- a/ydb/core/kqp/ut/join/kqp_block_hash_join_ut.cpp +++ b/ydb/core/kqp/ut/join/kqp_block_hash_join_ut.cpp @@ -332,6 +332,98 @@ Y_UNIT_TEST_SUITE(KqpBlockHashJoin) { } } + Y_UNIT_TEST(BlockHashJoinLeftJoinBuildLeftSide) { + TKikimrSettings settings = TKikimrSettings().SetWithSampleTables(false); + settings.AppConfig.MutableTableServiceConfig()->SetEnableOlapSink(true); + TKikimrRunner kikimr(settings); + + auto queryClient = kikimr.GetQueryClient(); + { + auto status = queryClient.ExecuteQuery( + R"( + CREATE TABLE `/Root/left_table` ( + id Int32 NOT NULL, + data String NOT NULL, + PRIMARY KEY (id, data) + ) + WITH (STORE = COLUMN); + + CREATE TABLE `/Root/right_table` ( + id Int32 NOT NULL, + data String NOT NULL, + PRIMARY KEY (id, data) + ) + WITH (STORE = COLUMN); + )", NYdb::NQuery::TTxControl::NoTx() + ).GetValueSync(); + UNIT_ASSERT_C(status.IsSuccess(), status.GetIssues().ToString()); + } + + { + auto status = queryClient.ExecuteQuery( + R"( + INSERT INTO `/Root/left_table` (id, data) VALUES + (1, "1"), + (2, "2"), + (3, "3"), + (4, "4"); + + INSERT INTO `/Root/right_table` (id, data) VALUES + (1, "1"), + (2, "2"), + (3, "3"); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx() + ).GetValueSync(); + UNIT_ASSERT_C(status.IsSuccess(), status.GetIssues().ToString()); + } + + { + TString hints = R"( + PRAGMA TablePathPrefix='/Root'; + PRAGMA ydb.OptimizerHints= + ' + Bytes(L # 10e9) + Bytes(R # 10e20) + Rows(L # 10e9) + Rows(R # 10e20) + '; + )"; + TString blocks = "PRAGMA ydb.UseBlockHashJoin = \"true\";\n\n"; + TString select = R"( + SELECT L.id AS left_id, L.data AS left_data, R.id AS right_id, R.data AS right_data + FROM `left_table` AS L + LEFT JOIN `right_table` AS R + ON L.id = R.id AND L.data = R.data + ORDER BY left_id; + )"; + + TString joinQuery = TStringBuilder() << hints << blocks << select; + + auto status = queryClient.ExecuteQuery(joinQuery, NYdb::NQuery::TTxControl::BeginTx().CommitTx()).GetValueSync(); + UNIT_ASSERT_C(status.IsSuccess(), status.GetIssues().ToString()); + + auto resultSet = status.GetResultSets()[0]; + UNIT_ASSERT_VALUES_EQUAL(resultSet.RowsCount(), 4); + + auto explainResult = queryClient.ExecuteQuery( + joinQuery, + NYdb::NQuery::TTxControl::NoTx(), + NYdb::NQuery::TExecuteQuerySettings().ExecMode(NYdb::NQuery::EExecMode::Explain) + ).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(explainResult.GetStatus(), EStatus::SUCCESS, explainResult.GetIssues().ToString()); + + auto astOpt = explainResult.GetStats()->GetAst(); + UNIT_ASSERT(astOpt.has_value()); + TString ast = TString(*astOpt); + Cout << "AST (LEFT JOIN, small left side -> build left): " << ast << Endl; + + UNIT_ASSERT_C(ast.Contains("BlockHashJoin") || ast.Contains("DqBlockHashJoin"), + TStringBuilder() << "AST should contain BlockHashJoin. Actual AST: " << ast); + UNIT_ASSERT_C(ast.Contains(R"('('"BuildSide" '"Left")"), + TStringBuilder() << "AST should contain the BuildSide=Left. Actual AST: " << ast); + } + } + Y_UNIT_TEST(BlockHashJoinLeftSemiJoin) { TKikimrSettings settings = TKikimrSettings().SetWithSampleTables(false); settings.AppConfig.MutableTableServiceConfig()->SetEnableOlapSink(true); |
