aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKirill Rysin <35688753+naspirato@users.noreply.github.com>2024-09-19 12:07:53 +0200
committerGitHub <noreply@github.com>2024-09-19 13:07:53 +0300
commit3c2b6817d3e668f9d4658c9d931f56a720ec7ef2 (patch)
tree561ef1fc831b97dbace18c02bf6aa27453165aed
parent05ce0a0c5e0c95a9fd41a255173b108be29f54f6 (diff)
downloadydb-3c2b6817d3e668f9d4658c9d931f56a720ec7ef2.tar.gz
optimisation of flaky analytics scripts (#9469)
-rwxr-xr-x.github/scripts/analytics/flaky_tests_history.py47
-rwxr-xr-x.github/scripts/analytics/flaky_tests_history_n_runs.py9
-rw-r--r--.github/workflows/collect_analytics.yml10
3 files changed, 39 insertions, 27 deletions
diff --git a/.github/scripts/analytics/flaky_tests_history.py b/.github/scripts/analytics/flaky_tests_history.py
index cc5d314c5f..95524930ed 100755
--- a/.github/scripts/analytics/flaky_tests_history.py
+++ b/.github/scripts/analytics/flaky_tests_history.py
@@ -30,6 +30,8 @@ def create_tables(pool, table_path):
`suite_folder` Utf8 NOT NULL,
`full_name` Utf8 NOT NULL,
`date_window` Date NOT NULL,
+ `build_type` Utf8 NOT NULL,
+ `branch` Utf8 NOT NULL,
`days_ago_window` Uint64 NOT NULL,
`history` String,
`history_class` String,
@@ -52,6 +54,8 @@ def bulk_upsert(table_client, table_path, rows):
ydb.BulkUpsertColumns()
.add_column("test_name", ydb.OptionalType(ydb.PrimitiveType.Utf8))
.add_column("suite_folder", ydb.OptionalType(ydb.PrimitiveType.Utf8))
+ .add_column("build_type", ydb.OptionalType(ydb.PrimitiveType.Utf8))
+ .add_column("branch", ydb.OptionalType(ydb.PrimitiveType.Utf8))
.add_column("full_name", ydb.OptionalType(ydb.PrimitiveType.Utf8))
.add_column("date_window", ydb.OptionalType(ydb.PrimitiveType.Date))
.add_column("days_ago_window", ydb.OptionalType(ydb.PrimitiveType.Uint64))
@@ -68,9 +72,13 @@ def bulk_upsert(table_client, table_path, rows):
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--days-window', default=5, type=int, help='how many days back we collecting history')
+ parser.add_argument('--build_type',choices=['relwithdebinfo', 'release-asan'], default='relwithdebinfo', type=str, help='build : relwithdebinfo or release-asan')
+ parser.add_argument('--branch', default='main',choices=['main'], type=str, help='branch')
args, unknown = parser.parse_known_args()
history_for_n_day = args.days_window
+ build_type = args.build_type
+ branch = args.branch
print(f'Getting hostory in window {history_for_n_day} days')
@@ -133,7 +141,9 @@ def main():
history_list,
dist_hist,
suite_folder,
- test_name
+ test_name,
+ '{build_type}' as build_type,
+ '{branch}' as branch
from (
select
full_name,
@@ -145,25 +155,19 @@ def main():
from (
select * from (
select * from (
- select
- DISTINCT suite_folder || '/' || test_name as full_name,
+ select DISTINCT
+ full_name,
suite_folder,
test_name
- from `test_results/test_runs_column`
- where
- status in ('failure','mute')
- and job_name in ('Nightly-run', 'Postcommit_relwithdebinfo','Postcommit_asan')
- and branch = 'main'
- and run_timestamp >= Date('{last_date}') -{history_for_n_day}*Interval("P1D")
- ) as tests_with_fails
+ from `test_results/analytics/testowners`
+ where run_timestamp_last >= Date('{last_date}') - 3*Interval("P1D")
+ ) as all_tests
cross join (
select
DISTINCT DateTime::MakeDate(run_timestamp) as date_base
from `test_results/test_runs_column`
where
- status in ('failure','mute')
- and job_name in ('Nightly-run', 'Postcommit_relwithdebinfo','Postcommit_asan')
- and branch = 'main'
+ (job_name ='Nightly-run' or job_name ='Postcommit_relwithdebinfo' or job_name ='Postcommit_asan')
and run_timestamp>= Date('{last_date}')
) as date_list
) as test_and_date
@@ -173,18 +177,19 @@ def main():
suite_folder || '/' || test_name as full_name,
run_timestamp,
status
- --ROW_NUMBER() OVER (PARTITION BY test_name ORDER BY run_timestamp DESC) AS rn
from `test_results/test_runs_column`
where
- run_timestamp >= Date('{last_date}') -{history_for_n_day}*Interval("P1D") and
- job_name in ('Nightly-run', 'Postcommit_relwithdebinfo')
- and build_type = 'relwithdebinfo'
+ run_timestamp >= Date('{last_date}') -{history_for_n_day}*Interval("P1D")
+ and (job_name ='Nightly-run' or job_name ='Postcommit_relwithdebinfo' or job_name ='Postcommit_asan')
+ and build_type = '{build_type}'
+ and branch = '{branch}'
+ order by full_name,run_timestamp desc
)
) as hist
ON test_and_date.full_name=hist.full_name
where
hist.run_timestamp >= test_and_date.date_base -{history_for_n_day}*Interval("P1D") AND
- hist.run_timestamp <= test_and_date.date_base
+ hist.run_timestamp < test_and_date.date_base + Interval("P1D")
)
GROUP BY full_name,suite_folder,test_name,date_base
@@ -211,13 +216,15 @@ def main():
print(f'history data captured, {len(results)} rows')
for row in results:
row['count'] = dict(zip(list(row['history_list']), [list(
- row['history_list']).count(i) for i in list(row['history_list'])]))
+ row['history_list']).count(i) for i in list(row['history_list'])]))
prepared_for_update_rows.append({
'suite_folder': row['suite_folder'],
'test_name': row['test_name'],
'full_name': row['full_name'],
'date_window': row['date_base'],
'days_ago_window': history_for_n_day,
+ 'build_type': row['build_type'],
+ 'branch': row['branch'],
'history': ','.join(row['history_list']).encode('utf8'),
'history_class': row['dist_hist'],
'pass_count': row['count'].get('passed', 0),
@@ -237,4 +244,4 @@ def main():
if __name__ == "__main__":
- main()
+ main() \ No newline at end of file
diff --git a/.github/scripts/analytics/flaky_tests_history_n_runs.py b/.github/scripts/analytics/flaky_tests_history_n_runs.py
index d2940c791c..49ced5cb76 100755
--- a/.github/scripts/analytics/flaky_tests_history_n_runs.py
+++ b/.github/scripts/analytics/flaky_tests_history_n_runs.py
@@ -175,7 +175,7 @@ def main():
max(run_timestamp) as last_run
from (
select * from (
- select
+ select distinct
t1.suite_folder,
t1.test_name,
t1.full_name,
@@ -184,6 +184,7 @@ def main():
'{build_type}' as build_type,
'{branch}' as branch
from `test_results/analytics/testowners` as t1
+ where run_timestamp_last >= Date('{date}') - 3*Interval("P1D")
) as test_and_date
left JOIN (
select * from (
@@ -198,7 +199,7 @@ def main():
where
run_timestamp <= Date('{date}') + Interval("P1D")
and run_timestamp >= Date('{date}') -13*Interval("P1D")
- and job_name in ('Postcommit_relwithdebinfo','Postcommit_asan')
+ and (job_name ='Nightly-run' or job_name ='Postcommit_relwithdebinfo' or job_name ='Postcommit_asan')
and build_type = '{build_type}'
and status != 'skipped'
and branch = '{branch}'
@@ -216,7 +217,7 @@ def main():
where
run_timestamp <= Date('{date}') + Interval("P1D")
and run_timestamp >= Date('{date}') -13*Interval("P1D")
- and job_name in ('Postcommit_relwithdebinfo','Postcommit_asan')
+ and (job_name ='Nightly-run' or job_name ='Postcommit_relwithdebinfo' or job_name ='Postcommit_asan')
and build_type = '{build_type}'
and status = 'skipped'
and branch = '{branch}'
@@ -282,4 +283,4 @@ def main():
if __name__ == "__main__":
- main()
+ main() \ No newline at end of file
diff --git a/.github/workflows/collect_analytics.yml b/.github/workflows/collect_analytics.yml
index ab8cbb2c25..c53f7aca7a 100644
--- a/.github/workflows/collect_analytics.yml
+++ b/.github/workflows/collect_analytics.yml
@@ -29,11 +29,15 @@ jobs:
python3 -m pip install ydb ydb[yc] codeowners
- name: Collect testowners
run: python3 .github/scripts/analytics/upload_testowners.py
- - name: Collect test history data with window 5 days
+ - name: Collect test history data with window 5 days relwithdebinfo for main
run: python3 .github/scripts/analytics/flaky_tests_history.py --days-window=5
- - name: Collect test history data with window 1 day
+ - name: Collect test history data with window 5 days release-asan for main
+ run: python3 .github/scripts/analytics/flaky_tests_history.py --days-window=5 --build_type=release-asan
+ - name: Collect test history data with window 1 days relwithdebinfo for main
run: python3 .github/scripts/analytics/flaky_tests_history.py --days-window=1
- - name: Collect test history data with window 10 run relwithdebinfo for main
+ - name: Collect test history data with window 1 days release-asan for main
+ run: python3 .github/scripts/analytics/flaky_tests_history.py --days-window=1 --build_type=release-asan
+ - name: Collect test history data with window 10 run relwithdebinfo for main
run: python3 .github/scripts/analytics/flaky_tests_history_n_runs.py --runs=10
- name: Collect test history data with window 10 run release-asan for main
run: python3 .github/scripts/analytics/flaky_tests_history_n_runs.py --runs=10 --build_type=release-asan