summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFilitov Mikhail <[email protected]>2026-06-18 06:23:53 +0200
committerGitHub <[email protected]>2026-06-18 07:23:53 +0300
commiteb5c7aa4ab32bc0a30371fd39f63efb8a80130e4 (patch)
tree7e1d9538fc4aa9237848a1e56cdbe29dac9f9eec
parent1b9f600f8b921bb9bc462a7396defeb032801ee2 (diff)
[BHJ] fix side selection for left join with large right side (#43761)
-rw-r--r--ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp4
-rw-r--r--ydb/core/kqp/ut/join/kqp_block_hash_join_ut.cpp92
2 files changed, 94 insertions, 2 deletions
diff --git a/ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp b/ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp
index 6b274d0a177..b4de5fdbcac 100644
--- a/ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp
+++ b/ydb/core/kqp/opt/logical/kqp_opt_cbo.cpp
@@ -251,8 +251,8 @@ double TKqpProviderContext::ComputeJoinCost(
+ CONSTS_GRACEJOIN_OUTPUT_MULT * std::pow(estimatedOutputByteSize, CONSTS_GRACEJOIN_OUTPUT_POW));
}
case EJoinAlgoType::ReverseBlockJoin: {
- return 1.5 * (CONSTS_GRACEJOIN_LEFT_SIDE_MULT * std::pow(leftSideByteSize, CONSTS_GRACEJOIN_LEFT_SIDE_POW)
- + CONSTS_GRACEJOIN_RIGHT_SIDE_MULT * std::pow(rightSideByteSize, CONSTS_GRACEJOIN_RIGHT_SIDE_POW)
+ return 1.5 * (CONSTS_GRACEJOIN_LEFT_SIDE_MULT * std::pow(rightSideByteSize, CONSTS_GRACEJOIN_LEFT_SIDE_POW)
+ + CONSTS_GRACEJOIN_RIGHT_SIDE_MULT * std::pow(leftSideByteSize, CONSTS_GRACEJOIN_RIGHT_SIDE_POW)
+ CONSTS_GRACEJOIN_OUTPUT_MULT * std::pow(estimatedOutputByteSize, CONSTS_GRACEJOIN_OUTPUT_POW));
}
default:
diff --git a/ydb/core/kqp/ut/join/kqp_block_hash_join_ut.cpp b/ydb/core/kqp/ut/join/kqp_block_hash_join_ut.cpp
index f2efa760338..6335c9de5fb 100644
--- a/ydb/core/kqp/ut/join/kqp_block_hash_join_ut.cpp
+++ b/ydb/core/kqp/ut/join/kqp_block_hash_join_ut.cpp
@@ -332,6 +332,98 @@ Y_UNIT_TEST_SUITE(KqpBlockHashJoin) {
}
}
+ Y_UNIT_TEST(BlockHashJoinLeftJoinBuildLeftSide) {
+ TKikimrSettings settings = TKikimrSettings().SetWithSampleTables(false);
+ settings.AppConfig.MutableTableServiceConfig()->SetEnableOlapSink(true);
+ TKikimrRunner kikimr(settings);
+
+ auto queryClient = kikimr.GetQueryClient();
+ {
+ auto status = queryClient.ExecuteQuery(
+ R"(
+ CREATE TABLE `/Root/left_table` (
+ id Int32 NOT NULL,
+ data String NOT NULL,
+ PRIMARY KEY (id, data)
+ )
+ WITH (STORE = COLUMN);
+
+ CREATE TABLE `/Root/right_table` (
+ id Int32 NOT NULL,
+ data String NOT NULL,
+ PRIMARY KEY (id, data)
+ )
+ WITH (STORE = COLUMN);
+ )", NYdb::NQuery::TTxControl::NoTx()
+ ).GetValueSync();
+ UNIT_ASSERT_C(status.IsSuccess(), status.GetIssues().ToString());
+ }
+
+ {
+ auto status = queryClient.ExecuteQuery(
+ R"(
+ INSERT INTO `/Root/left_table` (id, data) VALUES
+ (1, "1"),
+ (2, "2"),
+ (3, "3"),
+ (4, "4");
+
+ INSERT INTO `/Root/right_table` (id, data) VALUES
+ (1, "1"),
+ (2, "2"),
+ (3, "3");
+ )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()
+ ).GetValueSync();
+ UNIT_ASSERT_C(status.IsSuccess(), status.GetIssues().ToString());
+ }
+
+ {
+ TString hints = R"(
+ PRAGMA TablePathPrefix='/Root';
+ PRAGMA ydb.OptimizerHints=
+ '
+ Bytes(L # 10e9)
+ Bytes(R # 10e20)
+ Rows(L # 10e9)
+ Rows(R # 10e20)
+ ';
+ )";
+ TString blocks = "PRAGMA ydb.UseBlockHashJoin = \"true\";\n\n";
+ TString select = R"(
+ SELECT L.id AS left_id, L.data AS left_data, R.id AS right_id, R.data AS right_data
+ FROM `left_table` AS L
+ LEFT JOIN `right_table` AS R
+ ON L.id = R.id AND L.data = R.data
+ ORDER BY left_id;
+ )";
+
+ TString joinQuery = TStringBuilder() << hints << blocks << select;
+
+ auto status = queryClient.ExecuteQuery(joinQuery, NYdb::NQuery::TTxControl::BeginTx().CommitTx()).GetValueSync();
+ UNIT_ASSERT_C(status.IsSuccess(), status.GetIssues().ToString());
+
+ auto resultSet = status.GetResultSets()[0];
+ UNIT_ASSERT_VALUES_EQUAL(resultSet.RowsCount(), 4);
+
+ auto explainResult = queryClient.ExecuteQuery(
+ joinQuery,
+ NYdb::NQuery::TTxControl::NoTx(),
+ NYdb::NQuery::TExecuteQuerySettings().ExecMode(NYdb::NQuery::EExecMode::Explain)
+ ).GetValueSync();
+ UNIT_ASSERT_VALUES_EQUAL_C(explainResult.GetStatus(), EStatus::SUCCESS, explainResult.GetIssues().ToString());
+
+ auto astOpt = explainResult.GetStats()->GetAst();
+ UNIT_ASSERT(astOpt.has_value());
+ TString ast = TString(*astOpt);
+ Cout << "AST (LEFT JOIN, small left side -> build left): " << ast << Endl;
+
+ UNIT_ASSERT_C(ast.Contains("BlockHashJoin") || ast.Contains("DqBlockHashJoin"),
+ TStringBuilder() << "AST should contain BlockHashJoin. Actual AST: " << ast);
+ UNIT_ASSERT_C(ast.Contains(R"('('"BuildSide" '"Left")"),
+ TStringBuilder() << "AST should contain the BuildSide=Left. Actual AST: " << ast);
+ }
+ }
+
Y_UNIT_TEST(BlockHashJoinLeftSemiJoin) {
TKikimrSettings settings = TKikimrSettings().SetWithSampleTables(false);
settings.AppConfig.MutableTableServiceConfig()->SetEnableOlapSink(true);