1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
|
-- All PRs with information about failed tests in PR-check.
--
-- Logic:
-- 1. Take all PRs from github_data/pull_requests (deduplicate by latest exported_at).
-- 2. For each PR find the latest PR-check run within $test_lookback_days days.
-- 3. Determine if PR is blocked by tests:
-- - is_pr_blocked_by_tests_in_last_run_and_try = 1 if the latest run
-- has failures on the third attempt (attempt = 3).
-- - Third attempt is final; if a test failed there, PR is blocked.
-- 4. LEFT JOIN to detailed info for each failed test:
-- full_name, job_id, branch, logs, owners, etc.
--
-- Result: one row per (PR, failed test) pair. PRs with no failures are included
-- (with empty test fields).
--
-- Parameters:
-- $test_lookback_days — test selection window (days). Limits scanning of
-- test_runs_column. PRs with tests older than this period are excluded.
-- Default 65 ≈ 2 months (buffer for long-lived PRs).
PRAGMA AnsiInForEmptyOrNullableItemsCollections;
$test_lookback_days = 65;
SELECT
pr.pr_number AS pr_number,
pr.project_item_id AS pr_project_item_id,
pr.pr_id AS pr_pr_id,
pr.title AS pr_title,
pr.url AS pr_url,
pr.state AS pr_state,
pr.body AS pr_body,
pr.body_text AS pr_body_text,
pr.is_draft AS pr_is_draft,
pr.additions AS pr_additions,
pr.deletions AS pr_deletions,
pr.changed_files AS pr_changed_files,
pr.mergeable AS pr_mergeable,
pr.merged AS pr_merged,
pr.created_at AS pr_created_at,
pr.updated_at AS pr_updated_at,
pr.closed_at AS pr_closed_at,
pr.merged_at AS pr_merged_at,
pr.created_date AS pr_created_date,
pr.updated_date AS pr_updated_date,
pr.days_since_created AS pr_days_since_created,
pr.days_since_updated AS pr_days_since_updated,
pr.time_to_close_hours AS pr_time_to_close_hours,
pr.time_to_merge_hours AS pr_time_to_merge_hours,
pr.author_login AS pr_author_login,
pr.author_url AS pr_author_url,
pr.repository_name AS pr_repository_name,
pr.repository_url AS pr_repository_url,
pr.head_ref_name AS pr_head_ref_name,
pr.base_ref_name AS pr_base_ref_name,
pr.assignees AS pr_assignees,
pr.labels AS pr_labels,
pr.milestone AS pr_milestone,
pr.project_fields AS pr_project_fields,
pr.review_decision AS pr_review_decision,
pr.total_reviews_count AS pr_total_reviews_count,
pr.total_comments_count AS pr_total_comments_count,
pr.merged_by_login AS pr_merged_by_login,
pr.merged_by_url AS pr_merged_by_url,
pr.info AS pr_info,
pr.exported_at AS pr_exported_at,
CASE
WHEN pr.merged = 1 THEN 'merged'
WHEN pr.state = 'OPEN' THEN 'open'
WHEN pr.state = 'CLOSED' THEN 'closed'
ELSE COALESCE(pr.state, 'unknown')
END AS pr_status,
COALESCE(block.is_pr_blocked_by_tests_in_last_run_and_try, 0) AS is_pr_blocked_by_tests_in_last_run_and_try,
block.last_run_url AS last_run_url,
COALESCE(t.full_name, CAST("" AS Utf8)) AS full_name,
t.suite_folder AS suite_folder,
t.test_name AS test_name,
COALESCE(t.job_id, 0UL) AS job_id,
t.run_url AS run_url,
COALESCE(t.last_run_timestamp, Timestamp("1970-01-01T00:00:00.000000Z")) AS last_run_timestamp,
t.branch AS branch,
t.build_type AS build_type,
t.status_description AS status_description,
t.attempt_number AS attempt_number,
t.is_last_run_in_pr AS is_last_run_in_pr,
t.owners AS owners,
t.log AS log,
t.logsdir AS logsdir,
t.stderr AS stderr,
t.stdout AS stdout,
t.error_type AS error_type,
t.metadata AS metadata,
t.metrics AS metrics
FROM
(
SELECT
pr_number,
project_item_id,
pr_id,
title,
url,
state,
body,
body_text,
is_draft,
additions,
deletions,
changed_files,
mergeable,
merged,
created_at,
updated_at,
closed_at,
merged_at,
created_date,
updated_date,
days_since_created,
days_since_updated,
time_to_close_hours,
time_to_merge_hours,
author_login,
author_url,
repository_name,
repository_url,
head_ref_name,
base_ref_name,
assignees,
labels,
milestone,
project_fields,
review_decision,
total_reviews_count,
total_comments_count,
merged_by_login,
merged_by_url,
info,
exported_at
FROM (
SELECT
pr_number,
project_item_id,
pr_id,
title,
url,
state,
body,
body_text,
is_draft,
additions,
deletions,
changed_files,
mergeable,
merged,
created_at,
updated_at,
closed_at,
merged_at,
created_date,
updated_date,
days_since_created,
days_since_updated,
time_to_close_hours,
time_to_merge_hours,
author_login,
author_url,
repository_name,
repository_url,
head_ref_name,
base_ref_name,
assignees,
labels,
milestone,
project_fields,
review_decision,
total_reviews_count,
total_comments_count,
merged_by_login,
merged_by_url,
info,
exported_at,
ROW_NUMBER() OVER (PARTITION BY pr_number ORDER BY exported_at DESC, created_date DESC) AS rn
FROM
`github_data/pull_requests`
) AS ranked
WHERE
ranked.rn = 1
) AS pr
LEFT JOIN
(
SELECT
pr_number,
CASE WHEN attempt_number = 3 AND has_failure = 1 THEN 1 ELSE 0 END AS is_pr_blocked_by_tests_in_last_run_and_try,
CASE WHEN attempt_number = 3 AND has_failure = 1 THEN 'https://github.com/ydb-platform/ydb/actions/runs/' || CAST(job_id AS UTF8) ELSE NULL END AS last_run_url
FROM (
SELECT
pr_number,
job_id,
run_timestamp,
attempt_number,
has_failure,
ROW_NUMBER() OVER (PARTITION BY pr_number ORDER BY run_timestamp DESC) AS rn
FROM (
SELECT
r.job_id AS job_id,
r.pr_number AS pr_number,
MAX(r.run_timestamp) AS run_timestamp,
MAX_BY(r.attempt_number, r.run_timestamp) AS attempt_number,
MAX(CASE WHEN r.status = 'failure' AND r.attempt_number = la.last_attempt THEN 1 ELSE 0 END) AS has_failure
FROM (
SELECT
job_id,
run_timestamp,
ListHead(
Unicode::SplitToList(
CASE
WHEN String::Contains(
ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), 'PR_'), 1)),
'#'
) THEN ListHead(
ListSkip(
Unicode::SplitToList(
ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), 'PR_'), 1)),
'#'
),
1
)
)
ELSE ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), 'PR_'), 1))
END,
'_'
)
) AS pr_number,
CASE
WHEN String::Contains(pull, 'attempt_') THEN COALESCE(CAST(ListHead(Unicode::SplitToList(ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), 'attempt_'), 1)), '_')) AS Int32), 1)
WHEN String::Contains(pull, '_A') THEN COALESCE(CAST(ListHead(Unicode::SplitToList(ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), '_A'), 1)), '_')) AS Int32), 1)
ELSE 1
END AS attempt_number,
status
FROM
`test_results/test_runs_column`
WHERE
build_type = 'relwithdebinfo'
AND job_name = 'PR-check'
AND run_timestamp > CurrentUtcDate() - $test_lookback_days * Interval("P1D")
AND pull IS NOT NULL
AND pull != ''
AND String::Contains(pull, 'PR_')
AND job_id IS NOT NULL
) AS r
INNER JOIN (
SELECT
job_id,
pr_number,
MAX_BY(attempt_number, run_timestamp) AS last_attempt
FROM (
SELECT
job_id,
run_timestamp,
ListHead(
Unicode::SplitToList(
CASE
WHEN String::Contains(
ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), 'PR_'), 1)),
'#'
) THEN ListHead(
ListSkip(
Unicode::SplitToList(
ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), 'PR_'), 1)),
'#'
),
1
)
)
ELSE ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), 'PR_'), 1))
END,
'_'
)
) AS pr_number,
CASE
WHEN String::Contains(pull, 'attempt_') THEN COALESCE(CAST(ListHead(Unicode::SplitToList(ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), 'attempt_'), 1)), '_')) AS Int32), 1)
WHEN String::Contains(pull, '_A') THEN COALESCE(CAST(ListHead(Unicode::SplitToList(ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), '_A'), 1)), '_')) AS Int32), 1)
ELSE 1
END AS attempt_number
FROM
`test_results/test_runs_column`
WHERE
build_type = 'relwithdebinfo'
AND job_name = 'PR-check'
AND run_timestamp > CurrentUtcDate() - $test_lookback_days * Interval("P1D")
AND pull IS NOT NULL
AND pull != ''
AND String::Contains(pull, 'PR_')
AND job_id IS NOT NULL
) AS pr_check_runs
GROUP BY
job_id,
pr_number
) AS la
ON r.job_id = la.job_id AND r.pr_number = la.pr_number
GROUP BY
r.job_id,
r.pr_number
) AS job_agg
) AS last_run_per_pr
WHERE
rn = 1
) AS block
ON block.pr_number = CAST(pr.pr_number AS Utf8)
LEFT JOIN
(
-- All failed tests from last job_id per PR (attempt 3)
SELECT
tests.pr_number AS pr_number,
tests.full_name AS full_name,
tests.suite_folder AS suite_folder,
tests.test_name AS test_name,
tests.job_id AS job_id,
'https://github.com/ydb-platform/ydb/actions/runs/' || CAST(tests.job_id AS UTF8) AS run_url,
tests.run_timestamp AS last_run_timestamp,
tests.branch AS branch,
'relwithdebinfo' AS build_type,
COALESCE(tests.status_description, '') AS status_description,
tests.attempt_number AS attempt_number,
1 AS is_last_run_in_pr,
-- Prefer stable owners from testowners (derived from main), fallback to owners from test run row.
COALESCE(o.owners, tests.owners) AS owners,
tests.log AS log,
tests.logsdir AS logsdir,
tests.stderr AS stderr,
tests.stdout AS stdout,
tests.error_type AS error_type,
tests.metadata AS metadata,
tests.metrics AS metrics
FROM (
SELECT
job_id,
run_timestamp,
suite_folder,
test_name,
suite_folder || '/' || test_name AS full_name,
branch,
status_description,
owners,
log,
logsdir,
stderr,
stdout,
error_type,
metadata,
metrics,
ListHead(
Unicode::SplitToList(
CASE
WHEN String::Contains(
ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), 'PR_'), 1)),
'#'
) THEN ListHead(
ListSkip(
Unicode::SplitToList(
ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), 'PR_'), 1)),
'#'
),
1
)
)
ELSE ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), 'PR_'), 1))
END,
'_'
)
) AS pr_number,
CASE
WHEN String::Contains(pull, 'attempt_') THEN COALESCE(CAST(ListHead(Unicode::SplitToList(ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), 'attempt_'), 1)), '_')) AS Int32), 1)
WHEN String::Contains(pull, '_A') THEN COALESCE(CAST(ListHead(Unicode::SplitToList(ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), '_A'), 1)), '_')) AS Int32), 1)
ELSE 1
END AS attempt_number
FROM
`test_results/test_runs_column`
WHERE
build_type = 'relwithdebinfo'
AND job_name = 'PR-check'
AND status = 'failure'
AND run_timestamp > CurrentUtcDate() - $test_lookback_days * Interval("P1D")
AND pull IS NOT NULL
AND pull != ''
AND String::Contains(pull, 'PR_')
AND job_id IS NOT NULL
AND branch IS NOT NULL
AND suite_folder IS NOT NULL
AND test_name IS NOT NULL
) AS tests
LEFT JOIN
`test_results/analytics/testowners` AS o
ON o.suite_folder = tests.suite_folder
AND o.test_name = tests.test_name
INNER JOIN (
-- Last job_id for each PR (attempt 3)
SELECT
pr_number,
MAX_BY(job_id, run_timestamp) AS last_job_id
FROM (
SELECT
job_id,
run_timestamp,
ListHead(
Unicode::SplitToList(
CASE
WHEN String::Contains(
ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), 'PR_'), 1)),
'#'
) THEN ListHead(
ListSkip(
Unicode::SplitToList(
ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), 'PR_'), 1)),
'#'
),
1
)
)
ELSE ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), 'PR_'), 1))
END,
'_'
)
) AS pr_number,
CASE
WHEN String::Contains(pull, 'attempt_') THEN COALESCE(CAST(ListHead(Unicode::SplitToList(ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), 'attempt_'), 1)), '_')) AS Int32), 1)
WHEN String::Contains(pull, '_A') THEN COALESCE(CAST(ListHead(Unicode::SplitToList(ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), '_A'), 1)), '_')) AS Int32), 1)
ELSE 1
END AS attempt_number
FROM
`test_results/test_runs_column`
WHERE
build_type = 'relwithdebinfo'
AND job_name = 'PR-check'
AND run_timestamp > CurrentUtcDate() - $test_lookback_days * Interval("P1D")
AND pull IS NOT NULL
AND pull != ''
AND String::Contains(pull, 'PR_')
AND job_id IS NOT NULL
) AS all_runs
WHERE attempt_number = 3
GROUP BY pr_number
) AS last_job
ON tests.pr_number = last_job.pr_number
AND tests.job_id = last_job.last_job_id
WHERE tests.attempt_number = 3
) AS t
ON CAST(t.pr_number AS Uint64) = pr.pr_number
ORDER BY
pr.pr_number,
t.last_run_timestamp DESC,
t.full_name
|