summaryrefslogtreecommitdiffstats
path: root/.github/scripts/analytics/data_mart_queries/pr_with_test_failures.sql
blob: 6d0e182417d2443f908030b44ae00fc842300342 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
-- All PRs with information about failed tests in PR-check.
--
-- Logic:
--   1. Take all PRs from github_data/pull_requests (deduplicate by latest exported_at).
--   2. For each PR find the latest PR-check run within $test_lookback_days days.
--   3. Determine if PR is blocked by tests:
--      - is_pr_blocked_by_tests_in_last_run_and_try = 1 if the latest run
--        has failures on the third attempt (attempt = 3).
--      - Third attempt is final; if a test failed there, PR is blocked.
--   4. LEFT JOIN to detailed info for each failed test:
--      full_name, job_id, branch, logs, owners, etc.
--
-- Result: one row per (PR, failed test) pair. PRs with no failures are included
-- (with empty test fields).
--
-- Parameters:
--   $test_lookback_days — test selection window (days). Limits scanning of
--     test_runs_column. PRs with tests older than this period are excluded.
--     Default 65 ≈ 2 months (buffer for long-lived PRs).

PRAGMA AnsiInForEmptyOrNullableItemsCollections;

$test_lookback_days = 65;

SELECT 
    pr.pr_number AS pr_number,
    pr.project_item_id AS pr_project_item_id,
    pr.pr_id AS pr_pr_id,
    pr.title AS pr_title,
    pr.url AS pr_url,
    pr.state AS pr_state,
    pr.body AS pr_body,
    pr.body_text AS pr_body_text,
    pr.is_draft AS pr_is_draft,
    pr.additions AS pr_additions,
    pr.deletions AS pr_deletions,
    pr.changed_files AS pr_changed_files,
    pr.mergeable AS pr_mergeable,
    pr.merged AS pr_merged,
    pr.created_at AS pr_created_at,
    pr.updated_at AS pr_updated_at,
    pr.closed_at AS pr_closed_at,
    pr.merged_at AS pr_merged_at,
    pr.created_date AS pr_created_date,
    pr.updated_date AS pr_updated_date,
    pr.days_since_created AS pr_days_since_created,
    pr.days_since_updated AS pr_days_since_updated,
    pr.time_to_close_hours AS pr_time_to_close_hours,
    pr.time_to_merge_hours AS pr_time_to_merge_hours,
    pr.author_login AS pr_author_login,
    pr.author_url AS pr_author_url,
    pr.repository_name AS pr_repository_name,
    pr.repository_url AS pr_repository_url,
    pr.head_ref_name AS pr_head_ref_name,
    pr.base_ref_name AS pr_base_ref_name,
    pr.assignees AS pr_assignees,
    pr.labels AS pr_labels,
    pr.milestone AS pr_milestone,
    pr.project_fields AS pr_project_fields,
    pr.review_decision AS pr_review_decision,
    pr.total_reviews_count AS pr_total_reviews_count,
    pr.total_comments_count AS pr_total_comments_count,
    pr.merged_by_login AS pr_merged_by_login,
    pr.merged_by_url AS pr_merged_by_url,
    pr.info AS pr_info,
    pr.exported_at AS pr_exported_at,
    CASE 
        WHEN pr.merged = 1 THEN 'merged'
        WHEN pr.state = 'OPEN' THEN 'open'
        WHEN pr.state = 'CLOSED' THEN 'closed'
        ELSE COALESCE(pr.state, 'unknown')
    END AS pr_status,
    COALESCE(block.is_pr_blocked_by_tests_in_last_run_and_try, 0) AS is_pr_blocked_by_tests_in_last_run_and_try,
    block.last_run_url AS last_run_url,
    COALESCE(t.full_name, CAST("" AS Utf8)) AS full_name,
    t.suite_folder AS suite_folder,
    t.test_name AS test_name,
    COALESCE(t.job_id, 0UL) AS job_id,
    t.run_url AS run_url,
    COALESCE(t.last_run_timestamp, Timestamp("1970-01-01T00:00:00.000000Z")) AS last_run_timestamp,
    t.branch AS branch,
    t.build_type AS build_type,
    t.status_description AS status_description,
    t.attempt_number AS attempt_number,
    t.is_last_run_in_pr AS is_last_run_in_pr,
    t.owners AS owners,
    t.log AS log,
    t.logsdir AS logsdir,
    t.stderr AS stderr,
    t.stdout AS stdout,
    t.error_type AS error_type,
    t.metadata AS metadata,
    t.metrics AS metrics
FROM 
    (
        SELECT 
            pr_number,
            project_item_id,
            pr_id,
            title,
            url,
            state,
            body,
            body_text,
            is_draft,
            additions,
            deletions,
            changed_files,
            mergeable,
            merged,
            created_at,
            updated_at,
            closed_at,
            merged_at,
            created_date,
            updated_date,
            days_since_created,
            days_since_updated,
            time_to_close_hours,
            time_to_merge_hours,
            author_login,
            author_url,
            repository_name,
            repository_url,
            head_ref_name,
            base_ref_name,
            assignees,
            labels,
            milestone,
            project_fields,
            review_decision,
            total_reviews_count,
            total_comments_count,
            merged_by_login,
            merged_by_url,
            info,
            exported_at
        FROM (
            SELECT 
                pr_number,
                project_item_id,
                pr_id,
                title,
                url,
                state,
                body,
                body_text,
                is_draft,
                additions,
                deletions,
                changed_files,
                mergeable,
                merged,
                created_at,
                updated_at,
                closed_at,
                merged_at,
                created_date,
                updated_date,
                days_since_created,
                days_since_updated,
                time_to_close_hours,
                time_to_merge_hours,
                author_login,
                author_url,
                repository_name,
                repository_url,
                head_ref_name,
                base_ref_name,
                assignees,
                labels,
                milestone,
                project_fields,
                review_decision,
                total_reviews_count,
                total_comments_count,
                merged_by_login,
                merged_by_url,
                info,
                exported_at,
                ROW_NUMBER() OVER (PARTITION BY pr_number ORDER BY exported_at DESC, created_date DESC) AS rn
            FROM 
                `github_data/pull_requests`
        ) AS ranked
        WHERE 
            ranked.rn = 1
    ) AS pr
LEFT JOIN 
    (
        SELECT 
            pr_number,
            CASE WHEN attempt_number = 3 AND has_failure = 1 THEN 1 ELSE 0 END AS is_pr_blocked_by_tests_in_last_run_and_try,
            CASE WHEN attempt_number = 3 AND has_failure = 1 THEN 'https://github.com/ydb-platform/ydb/actions/runs/' || CAST(job_id AS UTF8) ELSE NULL END AS last_run_url
        FROM (
            SELECT 
                pr_number,
                job_id,
                run_timestamp,
                attempt_number,
                has_failure,
                ROW_NUMBER() OVER (PARTITION BY pr_number ORDER BY run_timestamp DESC) AS rn
            FROM (
                SELECT 
                    r.job_id AS job_id,
                    r.pr_number AS pr_number,
                    MAX(r.run_timestamp) AS run_timestamp,
                    MAX_BY(r.attempt_number, r.run_timestamp) AS attempt_number,
                    MAX(CASE WHEN r.status = 'failure' AND r.attempt_number = la.last_attempt THEN 1 ELSE 0 END) AS has_failure
                FROM (
                    SELECT 
                        job_id,
                        run_timestamp,
                        ListHead(
                            Unicode::SplitToList(
                                CASE 
                                    WHEN String::Contains(
                                        ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), 'PR_'), 1)),
                                        '#'
                                    ) THEN ListHead(
                                        ListSkip(
                                            Unicode::SplitToList(
                                                ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), 'PR_'), 1)),
                                                '#'
                                            ),
                                            1
                                        )
                                    )
                                    ELSE ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), 'PR_'), 1))
                                END,
                                '_'
                            )
                        ) AS pr_number,
                        CASE 
                            WHEN String::Contains(pull, 'attempt_') THEN COALESCE(CAST(ListHead(Unicode::SplitToList(ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), 'attempt_'), 1)), '_')) AS Int32), 1)
                            WHEN String::Contains(pull, '_A') THEN COALESCE(CAST(ListHead(Unicode::SplitToList(ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), '_A'), 1)), '_')) AS Int32), 1)
                            ELSE 1
                        END AS attempt_number,
                        status
                    FROM 
                        `test_results/test_runs_column`
                    WHERE 
                        build_type = 'relwithdebinfo'
                        AND job_name = 'PR-check'
                        AND run_timestamp > CurrentUtcDate() - $test_lookback_days * Interval("P1D")
                        AND pull IS NOT NULL
                        AND pull != ''
                        AND String::Contains(pull, 'PR_')
                        AND job_id IS NOT NULL
                ) AS r
                INNER JOIN (
                    SELECT 
                        job_id,
                        pr_number,
                        MAX_BY(attempt_number, run_timestamp) AS last_attempt
                    FROM (
                        SELECT 
                            job_id,
                            run_timestamp,
                            ListHead(
                                Unicode::SplitToList(
                                    CASE 
                                        WHEN String::Contains(
                                            ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), 'PR_'), 1)),
                                            '#'
                                        ) THEN ListHead(
                                            ListSkip(
                                                Unicode::SplitToList(
                                                    ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), 'PR_'), 1)),
                                                    '#'
                                                ),
                                                1
                                            )
                                        )
                                        ELSE ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), 'PR_'), 1))
                                    END,
                                    '_'
                                )
                            ) AS pr_number,
                            CASE 
                                WHEN String::Contains(pull, 'attempt_') THEN COALESCE(CAST(ListHead(Unicode::SplitToList(ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), 'attempt_'), 1)), '_')) AS Int32), 1)
                                WHEN String::Contains(pull, '_A') THEN COALESCE(CAST(ListHead(Unicode::SplitToList(ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), '_A'), 1)), '_')) AS Int32), 1)
                                ELSE 1
                            END AS attempt_number
                        FROM 
                            `test_results/test_runs_column`
                        WHERE 
                            build_type = 'relwithdebinfo'
                            AND job_name = 'PR-check'
                            AND run_timestamp > CurrentUtcDate() - $test_lookback_days * Interval("P1D")
                            AND pull IS NOT NULL
                            AND pull != ''
                            AND String::Contains(pull, 'PR_')
                            AND job_id IS NOT NULL
                    ) AS pr_check_runs
                    GROUP BY 
                        job_id,
                        pr_number
                ) AS la
                ON r.job_id = la.job_id AND r.pr_number = la.pr_number
                GROUP BY 
                    r.job_id,
                    r.pr_number
            ) AS job_agg
        ) AS last_run_per_pr
        WHERE 
            rn = 1
    ) AS block
    ON block.pr_number = CAST(pr.pr_number AS Utf8)
LEFT JOIN 
    (
        -- All failed tests from last job_id per PR (attempt 3)
        SELECT 
            tests.pr_number AS pr_number,
            tests.full_name AS full_name,
            tests.suite_folder AS suite_folder,
            tests.test_name AS test_name,
            tests.job_id AS job_id,
            'https://github.com/ydb-platform/ydb/actions/runs/' || CAST(tests.job_id AS UTF8) AS run_url,
            tests.run_timestamp AS last_run_timestamp,
            tests.branch AS branch,
            'relwithdebinfo' AS build_type,
            COALESCE(tests.status_description, '') AS status_description,
            tests.attempt_number AS attempt_number,
            1 AS is_last_run_in_pr,
            -- Prefer stable owners from testowners (derived from main), fallback to owners from test run row.
            COALESCE(o.owners, tests.owners) AS owners,
            tests.log AS log,
            tests.logsdir AS logsdir,
            tests.stderr AS stderr,
            tests.stdout AS stdout,
            tests.error_type AS error_type,
            tests.metadata AS metadata,
            tests.metrics AS metrics
        FROM (
            SELECT 
                job_id,
                run_timestamp,
                suite_folder,
                test_name,
                suite_folder || '/' || test_name AS full_name,
                branch,
                status_description,
                owners,
                log,
                logsdir,
                stderr,
                stdout,
                error_type,
                metadata,
                metrics,
                ListHead(
                    Unicode::SplitToList(
                        CASE 
                            WHEN String::Contains(
                                ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), 'PR_'), 1)),
                                '#'
                            ) THEN ListHead(
                                ListSkip(
                                    Unicode::SplitToList(
                                        ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), 'PR_'), 1)),
                                        '#'
                                    ),
                                    1
                                )
                            )
                            ELSE ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), 'PR_'), 1))
                        END,
                        '_'
                    )
                ) AS pr_number,
                CASE 
                    WHEN String::Contains(pull, 'attempt_') THEN COALESCE(CAST(ListHead(Unicode::SplitToList(ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), 'attempt_'), 1)), '_')) AS Int32), 1)
                    WHEN String::Contains(pull, '_A') THEN COALESCE(CAST(ListHead(Unicode::SplitToList(ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), '_A'), 1)), '_')) AS Int32), 1)
                    ELSE 1
                END AS attempt_number
            FROM 
                `test_results/test_runs_column`
            WHERE 
                build_type = 'relwithdebinfo'
                AND job_name = 'PR-check'
                AND status = 'failure'
                AND run_timestamp > CurrentUtcDate() - $test_lookback_days * Interval("P1D")
                AND pull IS NOT NULL
                AND pull != ''
                AND String::Contains(pull, 'PR_')
                AND job_id IS NOT NULL
                AND branch IS NOT NULL
                AND suite_folder IS NOT NULL
                AND test_name IS NOT NULL
        ) AS tests
        LEFT JOIN
            `test_results/analytics/testowners` AS o
            ON o.suite_folder = tests.suite_folder
            AND o.test_name = tests.test_name
        INNER JOIN (
            -- Last job_id for each PR (attempt 3)
            SELECT 
                pr_number,
                MAX_BY(job_id, run_timestamp) AS last_job_id
            FROM (
                SELECT 
                    job_id,
                    run_timestamp,
                    ListHead(
                        Unicode::SplitToList(
                            CASE 
                                WHEN String::Contains(
                                    ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), 'PR_'), 1)),
                                    '#'
                                ) THEN ListHead(
                                    ListSkip(
                                        Unicode::SplitToList(
                                            ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), 'PR_'), 1)),
                                            '#'
                                        ),
                                        1
                                    )
                                )
                                ELSE ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), 'PR_'), 1))
                            END,
                            '_'
                        )
                    ) AS pr_number,
                    CASE 
                        WHEN String::Contains(pull, 'attempt_') THEN COALESCE(CAST(ListHead(Unicode::SplitToList(ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), 'attempt_'), 1)), '_')) AS Int32), 1)
                        WHEN String::Contains(pull, '_A') THEN COALESCE(CAST(ListHead(Unicode::SplitToList(ListHead(ListSkip(Unicode::SplitToList(CAST(pull AS UTF8), '_A'), 1)), '_')) AS Int32), 1)
                        ELSE 1
                    END AS attempt_number
                FROM 
                    `test_results/test_runs_column`
                WHERE 
                    build_type = 'relwithdebinfo'
                    AND job_name = 'PR-check'
                    AND run_timestamp > CurrentUtcDate() - $test_lookback_days * Interval("P1D")
                    AND pull IS NOT NULL
                    AND pull != ''
                    AND String::Contains(pull, 'PR_')
                    AND job_id IS NOT NULL
            ) AS all_runs
            WHERE attempt_number = 3
            GROUP BY pr_number
        ) AS last_job
        ON tests.pr_number = last_job.pr_number 
        AND tests.job_id = last_job.last_job_id
        WHERE tests.attempt_number = 3
    ) AS t
    ON CAST(t.pr_number AS Uint64) = pr.pr_number
ORDER BY 
    pr.pr_number,
    t.last_run_timestamp DESC,
    t.full_name