aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKirill Rysin <35688753+naspirato@users.noreply.github.com>2024-08-30 11:52:07 +0200
committerGitHub <noreply@github.com>2024-08-30 12:52:07 +0300
commitc859376d925410b99af317b42d2c2f7dc887ca3b (patch)
treecaa42727b64ce02d871c9bcc1c4527bd255b3fbd
parent9445a62996f6ec536fea0927abe084c6092f1933 (diff)
downloadydb-c859376d925410b99af317b42d2c2f7dc887ca3b.tar.gz
Upload testowners analytics in workflow (#8156)
-rw-r--r--.github/TESTOWNERS1
-rw-r--r--.github/actions/test_ya/action.yml2
-rwxr-xr-x.github/scripts/analytics/flaky_tests_history.py16
-rwxr-xr-x.github/scripts/analytics/flaky_tests_history_n_runs.py244
-rwxr-xr-x.github/scripts/analytics/upload_testowners.py147
-rwxr-xr-x.github/scripts/analytics/upload_tests_results.py (renamed from .github/scripts/upload_tests_results.py)6
-rw-r--r--.github/workflows/collect_analytics.yml21
7 files changed, 422 insertions, 15 deletions
diff --git a/.github/TESTOWNERS b/.github/TESTOWNERS
index f039e0d6c2..b228cdc58e 100644
--- a/.github/TESTOWNERS
+++ b/.github/TESTOWNERS
@@ -13,6 +13,7 @@
/ydb/core/tx/datashard @ydb-platform/datashard
/ydb/core/mon_alloc @ydb-platform/datashard
/ydb/core/tx/coordinator @ydb-platform/datashard
+/ydb/core/statistics @ydb-platform/datashard
#Column Tables Development Team @zverevgeny TEAM:@ydb-platform/cs
/ydb/core/tx/columnshard @ydb-platform/cs
diff --git a/.github/actions/test_ya/action.yml b/.github/actions/test_ya/action.yml
index 3af76b2fbe..83a70954cf 100644
--- a/.github/actions/test_ya/action.yml
+++ b/.github/actions/test_ya/action.yml
@@ -384,7 +384,7 @@ runs:
# upload tests results to YDB
ydb_upload_run_name="${TESTMO_RUN_NAME// /"_"}"
- result=`.github/scripts/upload_tests_results.py --test-results-file ${CURRENT_JUNIT_XML_PATH} --run-timestamp $(date +%s) --commit $(git rev-parse HEAD) --build-type ${BUILD_PRESET} --pull $ydb_upload_run_name --job-name "${{ github.workflow }}" --job-id "${{ github.run_id }}" --branch ${GITHUB_REF_NAME}`
+ result=`.github/scripts/analytics/upload_tests_results.py --test-results-file ${CURRENT_JUNIT_XML_PATH} --run-timestamp $(date +%s) --commit $(git rev-parse HEAD) --build-type ${BUILD_PRESET} --pull $ydb_upload_run_name --job-name "${{ github.workflow }}" --job-id "${{ github.run_id }}" --branch ${GITHUB_REF_NAME}`
if [ ${{ inputs.testman_token }} ]; then
# finish testme session
diff --git a/.github/scripts/analytics/flaky_tests_history.py b/.github/scripts/analytics/flaky_tests_history.py
index 60ed1afefc..cc5d314c5f 100755
--- a/.github/scripts/analytics/flaky_tests_history.py
+++ b/.github/scripts/analytics/flaky_tests_history.py
@@ -15,19 +15,17 @@ config = configparser.ConfigParser()
config_file_path = f"{dir}/../../config/ydb_qa_db.ini"
config.read(config_file_path)
-build_preset = os.environ.get("build_preset")
-branch = os.environ.get("branch_to_compare")
DATABASE_ENDPOINT = config["QA_DB"]["DATABASE_ENDPOINT"]
DATABASE_PATH = config["QA_DB"]["DATABASE_PATH"]
def create_tables(pool, table_path):
- print(f"> create table: {table_path}")
+ print(f"> create table if not exists:'{table_path}'")
def callee(session):
session.execute_scheme(f"""
- CREATE table `{table_path}` (
+ CREATE table IF NOT EXISTS `{table_path}` (
`test_name` Utf8 NOT NULL,
`suite_folder` Utf8 NOT NULL,
`full_name` Utf8 NOT NULL,
@@ -154,9 +152,9 @@ def main():
from `test_results/test_runs_column`
where
status in ('failure','mute')
- and job_name in ('Nightly-run', 'Postcommit_relwithdebinfo')
- and build_type = 'relwithdebinfo' and
- run_timestamp >= Date('{last_date}') -{history_for_n_day}*Interval("P1D")
+ and job_name in ('Nightly-run', 'Postcommit_relwithdebinfo','Postcommit_asan')
+ and branch = 'main'
+ and run_timestamp >= Date('{last_date}') -{history_for_n_day}*Interval("P1D")
) as tests_with_fails
cross join (
select
@@ -164,8 +162,8 @@ def main():
from `test_results/test_runs_column`
where
status in ('failure','mute')
- and job_name in ('Nightly-run', 'Postcommit_relwithdebinfo')
- and build_type = 'relwithdebinfo'
+ and job_name in ('Nightly-run', 'Postcommit_relwithdebinfo','Postcommit_asan')
+ and branch = 'main'
and run_timestamp>= Date('{last_date}')
) as date_list
) as test_and_date
diff --git a/.github/scripts/analytics/flaky_tests_history_n_runs.py b/.github/scripts/analytics/flaky_tests_history_n_runs.py
new file mode 100755
index 0000000000..e0179125c9
--- /dev/null
+++ b/.github/scripts/analytics/flaky_tests_history_n_runs.py
@@ -0,0 +1,244 @@
+#!/usr/bin/env python3
+
+import argparse
+import configparser
+import datetime
+import os
+import posixpath
+import traceback
+import time
+import ydb
+
+dir = os.path.dirname(__file__)
+config = configparser.ConfigParser()
+config_file_path = f"{dir}/../../config/ydb_qa_db.ini"
+config.read(config_file_path)
+
+build_preset = os.environ.get("build_preset")
+branch = os.environ.get("branch_to_compare")
+
+DATABASE_ENDPOINT = config["QA_DB"]["DATABASE_ENDPOINT"]
+DATABASE_PATH = config["QA_DB"]["DATABASE_PATH"]
+
+
+def create_tables(pool, table_path):
+ print(f"> create table: {table_path}")
+
+ def callee(session):
+ session.execute_scheme(f"""
+ CREATE table `{table_path}` (
+ `test_name` Utf8 NOT NULL,
+ `suite_folder` Utf8 NOT NULL,
+ `full_name` Utf8 NOT NULL,
+ `date_window` Date NOT NULL,
+ `build_type` Utf8 NOT NULL,
+ `branch` Utf8 NOT NULL,
+ `runs_window` Uint64 NOT NULL,
+ `history` String,
+ `history_class` String,
+ `pass_count` Uint64,
+ `mute_count` Uint64,
+ `fail_count` Uint64,
+ `skip_count` Uint64,
+ PRIMARY KEY (`test_name`, `suite_folder`, `full_name`,date_window,runs_window,build_type,branch)
+ )
+ PARTITION BY HASH(`full_name`,build_type,branch)
+ WITH (STORE = COLUMN)
+ """)
+
+ return pool.retry_operation_sync(callee)
+
+
+def bulk_upsert(table_client, table_path, rows):
+ print(f"> bulk upsert: {table_path}")
+ column_types = (
+ ydb.BulkUpsertColumns()
+ .add_column("test_name", ydb.OptionalType(ydb.PrimitiveType.Utf8))
+ .add_column("suite_folder", ydb.OptionalType(ydb.PrimitiveType.Utf8))
+ .add_column("build_type", ydb.OptionalType(ydb.PrimitiveType.Utf8))
+ .add_column("branch", ydb.OptionalType(ydb.PrimitiveType.Utf8))
+ .add_column("full_name", ydb.OptionalType(ydb.PrimitiveType.Utf8))
+ .add_column("date_window", ydb.OptionalType(ydb.PrimitiveType.Date))
+ .add_column("runs_window", ydb.OptionalType(ydb.PrimitiveType.Uint64))
+ .add_column("history", ydb.OptionalType(ydb.PrimitiveType.String))
+ .add_column("history_class", ydb.OptionalType(ydb.PrimitiveType.String))
+ .add_column("pass_count", ydb.OptionalType(ydb.PrimitiveType.Uint64))
+ .add_column("mute_count", ydb.OptionalType(ydb.PrimitiveType.Uint64))
+ .add_column("fail_count", ydb.OptionalType(ydb.PrimitiveType.Uint64))
+ .add_column("skip_count", ydb.OptionalType(ydb.PrimitiveType.Uint64))
+ )
+ table_client.bulk_upsert(table_path, rows, column_types)
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument('--runs', default=10,choices=[10, 25, 50], type=int, help='how many runs back we collecting history')
+ parser.add_argument('--build_type',choices=['relwithdebinfo', 'release-asan'], default='relwithdebinfo', type=str, help='build : relwithdebinfo or release-asan')
+ parser.add_argument('--branch', default='main',choices=['main'], type=str, help='branch')
+
+ args, unknown = parser.parse_known_args()
+ history_for_n_runs = args.runs
+ build_type = args.build_type
+ branch = args.branch
+
+ print(f'Getting hostory in window {history_for_n_runs} runs')
+
+
+ if "CI_YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS" not in os.environ:
+ print(
+ "Error: Env variable CI_YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS is missing, skipping"
+ )
+ return 1
+ else:
+ # Do not set up 'real' variable from gh workflows because it interfere with ydb tests
+ # So, set up it locally
+ os.environ["YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS"] = os.environ[
+ "CI_YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS"
+ ]
+ with ydb.Driver(
+ endpoint=DATABASE_ENDPOINT,
+ database=DATABASE_PATH,
+ credentials=ydb.credentials_from_env_variables(),
+ ) as driver:
+ driver.wait(timeout=10, fail_fast=True)
+ session = ydb.retry_operation_sync(
+ lambda: driver.table_client.session().create()
+ )
+
+ # settings, paths, consts
+ tc_settings = ydb.TableClientSettings().with_native_date_in_result_sets(enabled=True)
+ table_client = ydb.TableClient(driver, tc_settings)
+
+ table_path = f'test_results/analytics/flaky_tests_history_{history_for_n_runs}_runs'
+ default_start_date = datetime.date(2024, 7, 19)
+
+ with ydb.SessionPool(driver) as pool:
+ create_tables(pool, table_path)
+
+ # geting last date from history
+ last_date_query = f"""select max(date_window) as max_date_window from `{table_path}`
+ where build_type = '{build_type}' and branch = '{branch}'"""
+ query = ydb.ScanQuery(last_date_query, {})
+ it = table_client.scan_query(query)
+ results = []
+ while True:
+ try:
+ result = next(it)
+ results = results + result.result_set.rows
+ except StopIteration:
+ break
+
+ if results[0] and results[0].get( 'max_date_window', default_start_date) is not None:
+ last_date = results[0].get(
+ 'max_date_window', default_start_date).strftime('%Y-%m-%d')
+ last_datetime = results[0].get(
+ 'max_date_window', default_start_date)
+ else:
+ last_date = default_start_date.strftime('%Y-%m-%d')
+ last_datetime = default_start_date
+
+ print(f'last hisotry date: {last_date}')
+ today = datetime.date.today()
+ date_list = [today - datetime.timedelta(days=x) for x in range((today - last_datetime).days+1)]
+ for date in sorted(date_list):
+ query_get_history = f"""
+ select
+ full_name,
+ date_base,
+ build_type,
+ branch,
+ history_list,
+ dist_hist,
+ suite_folder,
+ test_name
+ from (
+ select
+ full_name,
+ date_base,
+ build_type,
+ branch,
+ AGG_LIST(status) as history_list ,
+ String::JoinFromList( AGG_LIST_DISTINCT(status) ,',') as dist_hist,
+ suite_folder,
+ test_name
+ from (
+ select * from (
+ select t1.test_name, t1.suite_folder, t1.full_name,
+ Date('{date}') as date_base,
+ '{build_type}' as build_type,
+ '{branch}' as branch
+ from `test_results/analytics/testowners` as t1
+ ) as test_and_date
+ left JOIN (
+ select * from (
+ select
+ suite_folder || '/' || test_name as full_name,
+ run_timestamp,
+ status ,
+ ROW_NUMBER() OVER (PARTITION BY test_name ORDER BY run_timestamp DESC) AS run_number
+ from `test_results/test_runs_column`
+ where
+ run_timestamp <= Date('{date}')
+ and run_timestamp >= Date('{date}') -14*Interval("P1D")
+ and job_name in ('Postcommit_relwithdebinfo','Postcommit_asan')
+ and build_type = '{build_type}'
+ and status != 'skipped'
+ and branch = '{branch}'
+ )
+ where run_number <= {history_for_n_runs}
+ ) as hist
+ ON test_and_date.full_name=hist.full_name
+ )
+ GROUP BY full_name,suite_folder,test_name,date_base,build_type,branch
+
+ )
+ """
+ query = ydb.ScanQuery(query_get_history, {})
+ # start transaction time
+ start_time = time.time()
+ it = driver.table_client.scan_query(query)
+ # end transaction time
+
+ results = []
+ prepared_for_update_rows = []
+ while True:
+ try:
+ result = next(it)
+ results = results + result.result_set.rows
+ except StopIteration:
+ break
+ end_time = time.time()
+ print(f'transaction duration: {end_time - start_time}')
+
+ print(f'history data captured, {len(results)} rows')
+ for row in results:
+ row['count'] = dict(zip(list(row['history_list']), [list(
+ row['history_list']).count(i) for i in list(row['history_list'])]))
+ prepared_for_update_rows.append({
+ 'suite_folder': row['suite_folder'],
+ 'test_name': row['test_name'],
+ 'full_name': row['full_name'],
+ 'date_window': row['date_base'],
+ 'build_type': row['build_type'],
+ 'branch': row['branch'],
+ 'runs_window': history_for_n_runs,
+ 'history': ','.join(row['history_list']).encode('utf8'),
+ 'history_class': row['dist_hist'],
+ 'pass_count': row['count'].get('passed', 0),
+ 'mute_count': row['count'].get('mute', 0),
+ 'fail_count': row['count'].get('failure', 0),
+ 'skip_count': row['count'].get('skipped', 0),
+ })
+ print(f'upserting history for date {date}')
+ with ydb.SessionPool(driver) as pool:
+
+ full_path = posixpath.join(DATABASE_PATH, table_path)
+ bulk_upsert(driver.table_client, full_path,
+ prepared_for_update_rows)
+
+ print('flaky history updated')
+ print('finished')
+
+
+if __name__ == "__main__":
+ main()
diff --git a/.github/scripts/analytics/upload_testowners.py b/.github/scripts/analytics/upload_testowners.py
new file mode 100755
index 0000000000..1c6b785cfb
--- /dev/null
+++ b/.github/scripts/analytics/upload_testowners.py
@@ -0,0 +1,147 @@
+#!/usr/bin/env python3
+
+import argparse
+import configparser
+import datetime
+import os
+import posixpath
+import traceback
+import time
+import ydb
+from collections import Counter
+
+dir = os.path.dirname(__file__)
+config = configparser.ConfigParser()
+config_file_path = f"{dir}/../../config/ydb_qa_db.ini"
+config.read(config_file_path)
+
+DATABASE_ENDPOINT = config["QA_DB"]["DATABASE_ENDPOINT"]
+DATABASE_PATH = config["QA_DB"]["DATABASE_PATH"]
+
+
+def create_tables(pool, table_path):
+ print(f"> create table if not exists:'{table_path}'")
+
+ def callee(session):
+ session.execute_scheme(f"""
+ CREATE table IF NOT EXISTS `{table_path}` (
+ `test_name` Utf8 NOT NULL,
+ `suite_folder` Utf8 NOT NULL,
+ `full_name` Utf8 NOT NULL,
+ `run_timestamp_last` Timestamp NOT NULL,
+ `owners` Utf8 ,
+ PRIMARY KEY (`test_name`, `suite_folder`, `full_name`)
+ )
+ PARTITION BY HASH(suite_folder,`full_name`)
+ WITH (STORE = COLUMN)
+ """)
+
+ return pool.retry_operation_sync(callee)
+
+
+def bulk_upsert(table_client, table_path, rows):
+ print(f"> bulk upsert: {table_path}")
+ column_types = (
+ ydb.BulkUpsertColumns()
+ .add_column("test_name", ydb.OptionalType(ydb.PrimitiveType.Utf8))
+ .add_column("suite_folder", ydb.OptionalType(ydb.PrimitiveType.Utf8))
+ .add_column("full_name", ydb.OptionalType(ydb.PrimitiveType.Utf8))
+ .add_column("run_timestamp_last", ydb.OptionalType(ydb.PrimitiveType.Timestamp))
+ .add_column("owners", ydb.OptionalType(ydb.PrimitiveType.Utf8))
+ )
+ table_client.bulk_upsert(table_path, rows, column_types)
+
+
+def main():
+ if "CI_YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS" not in os.environ:
+ print(
+ "Error: Env variable CI_YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS is missing, skipping"
+ )
+ return 1
+ else:
+ # Do not set up 'real' variable from gh workflows because it interfere with ydb tests
+ # So, set up it locally
+ os.environ["YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS"] = os.environ[
+ "CI_YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS"
+ ]
+ with ydb.Driver(
+ endpoint=DATABASE_ENDPOINT,
+ database=DATABASE_PATH,
+ credentials=ydb.credentials_from_env_variables(),
+ ) as driver:
+ driver.wait(timeout=10, fail_fast=True)
+ session = ydb.retry_operation_sync(
+ lambda: driver.table_client.session().create()
+ )
+
+ # settings, paths, consts
+ tc_settings = ydb.TableClientSettings().with_native_date_in_result_sets(enabled=True)
+ table_client = ydb.TableClient(driver, tc_settings)
+
+ table_path = f'test_results/analytics/testowners'
+
+ query_get_owners = f"""
+ select
+ DISTINCT test_name,
+ suite_folder,
+ suite_folder || '/' || test_name as full_name,
+ FIRST_VALUE(owners) OVER w AS owners,
+ FIRST_VALUE (run_timestamp) OVER w AS run_timestamp_last
+ FROM
+ `test_results/test_runs_column`
+ WHERE
+ run_timestamp >= CurrentUtcDate()- Interval("P10D")
+ AND branch = 'main'
+ and job_name in (
+ 'Nightly-run', 'Postcommit_relwithdebinfo',
+ 'Postcommit_asan'
+ )
+ WINDOW w AS (
+ PARTITION BY test_name,
+ suite_folder
+ ORDER BY
+ run_timestamp DESC
+ )
+ order by
+ run_timestamp_last desc
+
+ """
+ query = ydb.ScanQuery(query_get_owners, {})
+ # start transaction time
+ start_time = time.time()
+ it = driver.table_client.scan_query(query)
+ # end transaction time
+
+ results = []
+ test_list = []
+ while True:
+ try:
+ result = next(it)
+ results = results + result.result_set.rows
+ except StopIteration:
+ break
+ end_time = time.time()
+ print(f'transaction duration: {end_time - start_time}')
+
+ print(f'testowners data captured, {len(results)} rows')
+ for row in results:
+ test_list.append({
+ 'suite_folder': row['suite_folder'],
+ 'test_name': row['test_name'],
+ 'full_name': row['full_name'],
+ 'owners': row['owners'],
+ 'run_timestamp_last': row['run_timestamp_last'],
+ })
+ print('upserting testowners')
+ with ydb.SessionPool(driver) as pool:
+
+ create_tables(pool, table_path)
+ full_path = posixpath.join(DATABASE_PATH, table_path)
+ bulk_upsert(driver.table_client, full_path,
+ test_list)
+
+ print('testowners updated')
+
+
+if __name__ == "__main__":
+ main()
diff --git a/.github/scripts/upload_tests_results.py b/.github/scripts/analytics/upload_tests_results.py
index 399a3f983c..b9a60f0577 100755
--- a/.github/scripts/upload_tests_results.py
+++ b/.github/scripts/analytics/upload_tests_results.py
@@ -20,11 +20,11 @@ from decimal import Decimal
def create_tables(pool, table_path):
- print(f"> create table: {table_path}")
+ print(f"> create table if not exists:'{table_path}'")
def callee(session):
session.execute_scheme(f"""
- CREATE table IF NOT EXISTS`{table_path}` (
+ CREATE table IF NOT EXISTS `{table_path}` (
build_type Utf8 NOT NULL,
job_name Utf8,
job_id Uint64,
@@ -180,7 +180,7 @@ def main():
path_in_database = "test_results"
dir = os.path.dirname(__file__)
- git_root = f"{dir}/../.."
+ git_root = f"{dir}/../../.."
codeowners = f"{git_root}/.github/TESTOWNERS"
config = configparser.ConfigParser()
config_file_path = f"{git_root}/.github/config/ydb_qa_db.ini"
diff --git a/.github/workflows/collect_analytics.yml b/.github/workflows/collect_analytics.yml
index eda7e83df4..c9ca6b1f05 100644
--- a/.github/workflows/collect_analytics.yml
+++ b/.github/workflows/collect_analytics.yml
@@ -1,7 +1,7 @@
name: Collect-analytics-run
on:
schedule:
- - cron: "0 */4 * * *" # Every 4 h
+ - cron: "0 * * * *" # Every 1 h
workflow_dispatch:
inputs:
commit_sha:
@@ -27,7 +27,24 @@ jobs:
- name: Install dependencies
run: |
python3 -m pip install ydb ydb[yc] codeowners
+
+ # Every 2 runs = every 1 hours
+ - name: Collect testowners
+ if: ${{ (github.event.schedule || '') == '' || (github.run_number % 2) == 1 }}
+ run: python3 .github/scripts/analytics/upload_testowners.py
+ # Every 4 runs = every 4 hours
- name: Collect test history data with window 5 days
+ if: ${{ (github.event.schedule || '') == '' || (github.run_number % 4) == 1 }}
run: python3 .github/scripts/analytics/flaky_tests_history.py --days-window=5
- - name: Collect test history data with window 1 day
+ - name: Collect test history data with window 1 day
+ if: ${{ (github.event.schedule || '') == '' || (github.run_number % 4) == 1 }}
run: python3 .github/scripts/analytics/flaky_tests_history.py --days-window=1
+ # Every 4 runs = every 4 hours
+ - name: Collect test history data with window 10 run relwithdebinfo for main
+ if: ${{ (github.event.schedule || '') == '' || (github.run_number % 4) == 1 }}
+ run: python3 .github/scripts/analytics/flaky_tests_history_n_runs.py --runs=10
+ - name: Collect test history data with window 10 run release-asan for main
+ if: ${{ (github.event.schedule || '') == '' || (github.run_number % 4) == 1 }}
+ run: python3 .github/scripts/analytics/flaky_tests_history_n_runs.py --runs=10 --build_type=release-asan
+
+