summaryrefslogtreecommitdiffstats
path: root/.github/scripts/analytics/upload_testowners.py
blob: 6283b82f1257a02488389b6e48daf0069af8e039 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/bin/env python3

import ydb
from ydb_wrapper import YDBWrapper

from testowners_utils import get_testowners_for_tests


def create_tables(ydb_wrapper, table_path):
    print(f"> create table if not exists:'{table_path}'")
    
    create_sql = f"""
        CREATE table IF NOT EXISTS `{table_path}` (
            `test_name` Utf8 NOT NULL,
            `suite_folder` Utf8 NOT NULL,
            `full_name` Utf8 NOT NULL,
            `run_timestamp_last` Timestamp NOT NULL,
            `owners` Utf8 ,
            PRIMARY KEY (`test_name`, `suite_folder`, `full_name`)
        )
            PARTITION BY HASH(suite_folder,`full_name`)
            WITH (STORE = COLUMN)
        """
    
    ydb_wrapper.create_table(table_path, create_sql)


def main():
    with YDBWrapper() as ydb_wrapper:
        # Check credentials
        if not ydb_wrapper.check_credentials():
            return 1
        
        # Get table paths from config
        test_runs_table = ydb_wrapper.get_table_path("test_results")
        table_path = ydb_wrapper.get_table_path("testowners")    

        query = f"""
   select
        DISTINCT test_name,
        suite_folder,
        suite_folder || '/' || test_name as full_name,
        FIRST_VALUE (run_timestamp) OVER w AS run_timestamp_last
        FROM
        `{test_runs_table}`
    WHERE
        run_timestamp >= CurrentUtcDate()- Interval("P1D")
        AND branch = 'main'
        and job_name in (
            'Nightly-run',
            'Regression-run',
            'Regression-run_Large',
            'Regression-run_Small_and_Medium',
            'Regression-run_compatibility',
            'Regression-whitelist-run',
            'Postcommit_relwithdebinfo',
            'Postcommit_asan'
        )
        and (pull IS NULL OR NOT String::Contains(pull, 'manual'))
        WINDOW w AS (
            PARTITION BY test_name,
            suite_folder
            ORDER BY
                run_timestamp DESC
        )
        order by
            run_timestamp_last desc
    """
        results = ydb_wrapper.execute_scan_query(query)

        print(f'testowners data captured, {len(results)} rows')
        test_list = []
        for row in results:
            test_list.append({
                'suite_folder': row['suite_folder'],
                'test_name': row['test_name'],
                'full_name': row['full_name'],
                'run_timestamp_last': row['run_timestamp_last'],
            })

        test_list = get_testowners_for_tests(test_list)

        print('upserting testowners')
        create_tables(ydb_wrapper, table_path)
        
        # Prepare column_types
        column_types = (
            ydb.BulkUpsertColumns()
            .add_column("test_name", ydb.OptionalType(ydb.PrimitiveType.Utf8))
            .add_column("suite_folder", ydb.OptionalType(ydb.PrimitiveType.Utf8))
            .add_column("full_name", ydb.OptionalType(ydb.PrimitiveType.Utf8))
            .add_column("run_timestamp_last", ydb.OptionalType(ydb.PrimitiveType.Timestamp))
            .add_column("owners", ydb.OptionalType(ydb.PrimitiveType.Utf8))
        )
        
        ydb_wrapper.bulk_upsert_batches(table_path, test_list, column_types, batch_size=1000)

        print('testowners updated')


if __name__ == "__main__":
    main()