aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKirill Rysin <35688753+naspirato@users.noreply.github.com>2024-08-09 19:41:34 +0200
committerGitHub <noreply@github.com>2024-08-09 17:41:34 +0000
commitc0b485623dd869177bc00c464bd7f07e41488aa6 (patch)
tree5297f002d793aea67b7e54186799ed0f8bf79ca2
parent49e62b47eec455deb9139643a0f6b56960352b9a (diff)
downloadydb-c0b485623dd869177bc00c464bd7f07e41488aa6.tar.gz
Flaky tests research (#7626)
-rwxr-xr-x.github/scripts/analytics/flaky_tests_history.py242
-rw-r--r--.github/workflows/collect_analytics.yml33
2 files changed, 275 insertions, 0 deletions
diff --git a/.github/scripts/analytics/flaky_tests_history.py b/.github/scripts/analytics/flaky_tests_history.py
new file mode 100755
index 0000000000..35764a3460
--- /dev/null
+++ b/.github/scripts/analytics/flaky_tests_history.py
@@ -0,0 +1,242 @@
+#!/usr/bin/env python3
+
+import argparse
+import configparser
+import datetime
+import os
+import posixpath
+import traceback
+import time
+import ydb
+from collections import Counter
+
+dir = os.path.dirname(__file__)
+config = configparser.ConfigParser()
+config_file_path = f"{dir}/../../config/ydb_qa_db.ini"
+config.read(config_file_path)
+
+build_preset = os.environ.get("build_preset")
+branch = os.environ.get("branch_to_compare")
+
+DATABASE_ENDPOINT = config["QA_DB"]["DATABASE_ENDPOINT"]
+DATABASE_PATH = config["QA_DB"]["DATABASE_PATH"]
+
+
+def create_tables(pool, table_path):
+ print(f"> create table: {table_path}")
+
+ def callee(session):
+ session.execute_scheme(f"""
+ CREATE table `{table_path}` (
+ `test_name` Utf8 NOT NULL,
+ `suite_folder` Utf8 NOT NULL,
+ `full_name` Utf8 NOT NULL,
+ `date_window` Date NOT NULL,
+ `days_ago_window` Uint64 NOT NULL,
+ `history` String,
+ `history_class` String,
+ `pass_count` Uint64,
+ `mute_count` Uint64,
+ `fail_count` Uint64,
+ `skip_count` Uint64,
+ PRIMARY KEY (`test_name`, `suite_folder`, `full_name`,date_window)
+ )
+ PARTITION BY HASH(`full_name`)
+ WITH (STORE = COLUMN)
+ """)
+
+ return pool.retry_operation_sync(callee)
+
+
+def bulk_upsert(table_client, table_path, rows):
+ print(f"> bulk upsert: {table_path}")
+ column_types = (
+ ydb.BulkUpsertColumns()
+ .add_column("test_name", ydb.OptionalType(ydb.PrimitiveType.Utf8))
+ .add_column("suite_folder", ydb.OptionalType(ydb.PrimitiveType.Utf8))
+ .add_column("full_name", ydb.OptionalType(ydb.PrimitiveType.Utf8))
+ .add_column("date_window", ydb.OptionalType(ydb.PrimitiveType.Date))
+ .add_column("days_ago_window", ydb.OptionalType(ydb.PrimitiveType.Uint64))
+ .add_column("history", ydb.OptionalType(ydb.PrimitiveType.String))
+ .add_column("history_class", ydb.OptionalType(ydb.PrimitiveType.String))
+ .add_column("pass_count", ydb.OptionalType(ydb.PrimitiveType.Uint64))
+ .add_column("mute_count", ydb.OptionalType(ydb.PrimitiveType.Uint64))
+ .add_column("fail_count", ydb.OptionalType(ydb.PrimitiveType.Uint64))
+ .add_column("skip_count", ydb.OptionalType(ydb.PrimitiveType.Uint64))
+ )
+ table_client.bulk_upsert(table_path, rows, column_types)
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--days-window', default=5, type=int, help='how many days back we collecting history')
+
+ args, unknown = parser.parse_known_args()
+ history_for_n_day = args.days_window
+
+ print(f'Getting hostory in window {history_for_n_day} days')
+
+
+ if "CI_YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS" not in os.environ:
+ print(
+ "Error: Env variable CI_YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS is missing, skipping"
+ )
+ return 1
+ else:
+ # Do not set up 'real' variable from gh workflows because it interfere with ydb tests
+ # So, set up it locally
+ os.environ["YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS"] = os.environ[
+ "CI_YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS"
+ ]
+ with ydb.Driver(
+ endpoint=DATABASE_ENDPOINT,
+ database=DATABASE_PATH,
+ credentials=ydb.credentials_from_env_variables(),
+ ) as driver:
+ driver.wait(timeout=10, fail_fast=True)
+ session = ydb.retry_operation_sync(
+ lambda: driver.table_client.session().create()
+ )
+
+ # settings, paths, consts
+ tc_settings = ydb.TableClientSettings().with_native_date_in_result_sets(enabled=True)
+ table_client = ydb.TableClient(driver, tc_settings)
+
+ table_path = f'test_results/analytics/flaky_tests_history_{history_for_n_day}_days'
+ default_start_date = datetime.date(2024, 7, 1)
+
+ with ydb.SessionPool(driver) as pool:
+ create_tables(pool, table_path)
+
+ # geting last date from history
+ last_date_query = f"select max(date_window) as max_date_window from `{table_path}`"
+ query = ydb.ScanQuery(last_date_query, {})
+ it = table_client.scan_query(query)
+ results = []
+ while True:
+ try:
+ result = next(it)
+ results = results + result.result_set.rows
+ except StopIteration:
+ break
+
+ if results[0] and results[0].get( 'max_date_window', default_start_date) is not None:
+ last_date = results[0].get(
+ 'max_date_window', default_start_date).strftime('%Y-%m-%d')
+ else:
+ last_date = default_start_date.strftime('%Y-%m-%d')
+
+ print(f'last hisotry date: {last_date}')
+ # getting history for dates >= last_date
+ query_get_history = f"""
+ select
+ full_name,
+ date_base,
+ history_list,
+ dist_hist,
+ suite_folder,
+ test_name
+ from (
+ select
+ full_name,
+ date_base,
+ AGG_LIST(status) as history_list ,
+ String::JoinFromList( AGG_LIST_DISTINCT(status) ,',') as dist_hist,
+ suite_folder,
+ test_name
+ from (
+ select * from (
+ select * from (
+ select
+ DISTINCT suite_folder || '/' || test_name as full_name,
+ suite_folder,
+ test_name
+ from `test_results/test_runs_results`
+ where
+ status in ('failure','mute')
+ and job_name in ('Nightly-run', 'Postcommit_relwithdebinfo')
+ and build_type = 'relwithdebinfo' and
+ run_timestamp >= Date('{last_date}') -{history_for_n_day}*Interval("P1D")
+ ) as tests_with_fails
+ cross join (
+ select
+ DISTINCT DateTime::MakeDate(run_timestamp) as date_base
+ from `test_results/test_runs_results`
+ where
+ status in ('failure','mute')
+ and job_name in ('Nightly-run', 'Postcommit_relwithdebinfo')
+ and build_type = 'relwithdebinfo'
+ and run_timestamp>= Date('{last_date}')
+ ) as date_list
+ ) as test_and_date
+ left JOIN (
+ select * from (
+ select
+ suite_folder || '/' || test_name as full_name,
+ run_timestamp,
+ status
+ --ROW_NUMBER() OVER (PARTITION BY test_name ORDER BY run_timestamp DESC) AS rn
+ from `test_results/test_runs_results`
+ where
+ run_timestamp >= Date('{last_date}') -{history_for_n_day}*Interval("P1D") and
+ job_name in ('Nightly-run', 'Postcommit_relwithdebinfo')
+ and build_type = 'relwithdebinfo'
+ )
+ ) as hist
+ ON test_and_date.full_name=hist.full_name
+ where
+ hist.run_timestamp >= test_and_date.date_base -{history_for_n_day}*Interval("P1D") AND
+ hist.run_timestamp <= test_and_date.date_base
+
+ )
+ GROUP BY full_name,suite_folder,test_name,date_base
+
+ )
+ """
+ query = ydb.ScanQuery(query_get_history, {})
+ # start transaction time
+ start_time = time.time()
+ it = driver.table_client.scan_query(query)
+ # end transaction time
+
+ results = []
+ prepared_for_update_rows = []
+ while True:
+ try:
+ result = next(it)
+ results = results + result.result_set.rows
+ except StopIteration:
+ break
+ end_time = time.time()
+ print(f'transaction duration: {end_time - start_time}')
+
+ print(f'history data captured, {len(results)} rows')
+ for row in results:
+ row['count'] = dict(zip(list(row['history_list']), [list(
+ row['history_list']).count(i) for i in list(row['history_list'])]))
+ prepared_for_update_rows.append({
+ 'suite_folder': row['suite_folder'],
+ 'test_name': row['test_name'],
+ 'full_name': row['full_name'],
+ 'date_window': row['date_base'],
+ 'days_ago_window': history_for_n_day,
+ 'history': ','.join(row['history_list']).encode('utf8'),
+ 'history_class': row['dist_hist'],
+ 'pass_count': row['count'].get('passed', 0),
+ 'mute_count': row['count'].get('mute', 0),
+ 'fail_count': row['count'].get('failure', 0),
+ 'skip_count': row['count'].get('skipped', 0),
+ })
+ print('upserting history')
+ with ydb.SessionPool(driver) as pool:
+
+ create_tables(pool, table_path)
+ full_path = posixpath.join(DATABASE_PATH, table_path)
+ bulk_upsert(driver.table_client, full_path,
+ prepared_for_update_rows)
+
+ print('history updated')
+
+
+if __name__ == "__main__":
+ main()
diff --git a/.github/workflows/collect_analytics.yml b/.github/workflows/collect_analytics.yml
new file mode 100644
index 0000000000..5473986512
--- /dev/null
+++ b/.github/workflows/collect_analytics.yml
@@ -0,0 +1,33 @@
+name: Collect-analytics-run
+on:
+ schedule:
+ - cron: "0 */4 * * *" # Every 4 h
+ workflow_dispatch:
+ inputs:
+ commit_sha:
+ type: string
+ default: ""
+
+defaults:
+ run:
+ shell: bash
+jobs:
+ main:
+ name: Checkout and setup
+ runs-on: [ self-hosted ]
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v3
+ with:
+ ref: ${{ inputs.commit_sha }}
+ - name: Setup ydb access
+ uses: ./.github/actions/setup_ci_ydb_service_account_key_file_credentials
+ with:
+ ci_ydb_service_account_key_file_credentials: ${{ secrets.CI_YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS }}
+ - name: Install dependencies
+ run: |
+ python3 -m pip install ydb ydb[yc] codeowners
+ - name: Collect test history data with window 5 days
+ run: python3 .github/scripts/analytics/flaky_tests_history.py --days-window=5
+ - name: Collect test history data with window 1 day
+ run: python3 .github/scripts/analytics/flaky_tests_history.py --days-window=1