diff options
| author | Kirill Rysin <[email protected]> | 2026-04-22 14:26:25 +0200 |
|---|---|---|
| committer | GitHub <[email protected]> | 2026-04-22 14:26:25 +0200 |
| commit | 3998280eede013947cdcf934ff1ebbdaa09b04db (patch) | |
| tree | 10ced931c519cdcd04aa5041c56e168712229fa3 /.github/scripts | |
| parent | 7a7c0968705282c6bd1cad2f36b15b8511fbe238 (diff) | |
MUTE: Fast unmute (#38430)
Diffstat (limited to '.github/scripts')
16 files changed, 1954 insertions, 80 deletions
diff --git a/.github/scripts/analytics/data_mart_queries/muted_tests_with_issue_and_area.sql b/.github/scripts/analytics/data_mart_queries/muted_tests_with_issue_and_area.sql index f6cb1651a27..98b4cefd665 100644 --- a/.github/scripts/analytics/data_mart_queries/muted_tests_with_issue_and_area.sql +++ b/.github/scripts/analytics/data_mart_queries/muted_tests_with_issue_and_area.sql @@ -1,4 +1,12 @@ -$window_days = 365; +-- Mart slice: how far back ``tests_monitor.date_window`` is included (not ``mute_config.json``). +$mart_history_days = 365; + +-- Mart filter: branch/build_type slice for this dashboard (not necessarily all CI matrix branches). +$mart_branch = 'main'; +$mart_build_type = 'relwithdebinfo'; + +-- Must match ``manual_unmute_ttl_calendar_days`` in ``.github/config/mute_config.json`` (fast-unmute deadline). +$manual_unmute_ttl_calendar_days = 3; $normalize = ($raw_area) -> { $parts = String::SplitToList(Cast($raw_area AS String), '/'); @@ -44,6 +52,18 @@ $gim_latest = ( WHERE g_rnk.rn = 1 ); +$mfu = ( + SELECT + full_name AS full_name, + branch AS branch, + build_type AS build_type, + github_issue_number AS mfu_issue_number, + requested_at AS mfu_since, + window_days AS mfu_window_days, + requested_at + $manual_unmute_ttl_calendar_days * Interval("P1D") AS mfu_expires_at + FROM `test_mute/fast_unmute_active` +); + SELECT tm.state_filtered AS state_filtered, tm.test_name AS test_name, @@ -82,7 +102,12 @@ SELECT gim.github_issue_state AS github_issue_state, gim.github_issue_created_at AS github_issue_created_at, gim.area_override AS area_override, - gim.area_override_since AS area_override_since + gim.area_override_since AS area_override_since, + CAST(CASE WHEN mfu.full_name IS NOT NULL THEN 1 ELSE 0 END AS Uint8) AS is_manual_fast_unmute, + mfu.mfu_since AS manual_fast_unmute_since, + mfu.mfu_window_days AS manual_fast_unmute_window_days, + mfu.mfu_expires_at AS manual_fast_unmute_expires_at, + mfu.mfu_issue_number AS manual_fast_unmute_issue_number FROM `test_results/analytics/tests_monitor` AS tm LEFT JOIN $area_fallback AS af ON Unicode::ToLower(Cast(Coalesce(String::ReplaceAll(tm.owner, 'TEAM:@ydb-platform/', ''), '') AS Utf8)) = af.owner_team @@ -90,9 +115,13 @@ LEFT JOIN $gim_latest AS gim ON tm.full_name = gim.full_name AND tm.branch = gim.branch AND tm.build_type = gim.build_type -WHERE tm.date_window >= CurrentUtcDate() - $window_days * Interval("P1D") - AND tm.branch = 'main' - AND tm.build_type = 'relwithdebinfo' +LEFT JOIN $mfu AS mfu + ON tm.full_name = mfu.full_name + AND tm.branch = mfu.branch + AND tm.build_type = mfu.build_type +WHERE tm.date_window >= CurrentUtcDate() - $mart_history_days * Interval("P1D") + AND tm.branch = $mart_branch + AND tm.build_type = $mart_build_type AND tm.is_test_chunk = 0 AND tm.is_muted = 1 AND tm.state != 'Skipped'; diff --git a/.github/scripts/analytics/data_mart_queries/test_muted_monitor_mart_with_issue.sql b/.github/scripts/analytics/data_mart_queries/test_muted_monitor_mart_with_issue.sql index 382392c2340..0c539268286 100644 --- a/.github/scripts/analytics/data_mart_queries/test_muted_monitor_mart_with_issue.sql +++ b/.github/scripts/analytics/data_mart_queries/test_muted_monitor_mart_with_issue.sql @@ -1,6 +1,20 @@ -- GitHub issue fields from github_issue_mapping; analytics area/owner + owner hand-off from tests_monitor. -- COALESCE fallback: when effective_* columns are NULL (not yet populated), fall back to owner string + area_to_owner_mapping. +-- Mart slice: last N calendar days of ``tests_monitor`` (not ``mute_config.json``). +$mart_monitor_date_span_days = 1; + +-- Mart branch filter: ``main`` plus release branches matching this prefix pattern. +$mart_main_branch = 'main'; +$mart_stable_branch_like = 'stable-%'; + +-- ``resolution`` / ``is_muted_or_skipped``: dashboard SLA thresholds (not ``mute_config.json`` windows). +$resolution_skipped_days_threshold = 14; +$resolution_muted_delete_candidate_days = 30; + +-- Must match ``manual_unmute_ttl_calendar_days`` in ``.github/config/mute_config.json`` (fast-unmute deadline). +$manual_unmute_ttl_calendar_days = 3; + $normalize = ($raw_area) -> { $parts = String::SplitToList(Cast($raw_area AS String), '/'); RETURN Cast( @@ -45,6 +59,18 @@ $gim_latest = ( WHERE g_rnk.rn = 1 ); +$mfu = ( + SELECT + full_name AS full_name, + branch AS branch, + build_type AS build_type, + github_issue_number AS mfu_issue_number, + requested_at AS mfu_since, + window_days AS mfu_window_days, + requested_at + $manual_unmute_ttl_calendar_days * Interval("P1D") AS mfu_expires_at + FROM `test_mute/fast_unmute_active` +); + SELECT tm.state_filtered AS state_filtered, tm.test_name AS test_name, @@ -72,8 +98,8 @@ SELECT tm.state_change_date_filtered AS state_change_date_filtered, tm.days_in_state_filtered AS days_in_state_filtered, CAST(CASE - WHEN (tm.state = 'Skipped' AND tm.days_in_state > 14) THEN 'Skipped' - WHEN tm.days_in_mute_state > 30 THEN 'MUTED: delete candidate' + WHEN (tm.state = 'Skipped' AND tm.days_in_state > $resolution_skipped_days_threshold) THEN 'Skipped' + WHEN tm.days_in_mute_state > $resolution_muted_delete_candidate_days THEN 'MUTED: delete candidate' ELSE 'MUTED: in sla' END as String) AS resolution, @@ -86,7 +112,7 @@ SELECT tm.effective_owner_team_changed_date AS effective_owner_team_changed_date, CAST( CASE - WHEN tm.is_muted = 1 OR (tm.state = 'Skipped' AND tm.days_in_state > 14) THEN TRUE + WHEN tm.is_muted = 1 OR (tm.state = 'Skipped' AND tm.days_in_state > $resolution_skipped_days_threshold) THEN TRUE ELSE FALSE END AS Uint8 ) AS is_muted_or_skipped, @@ -95,7 +121,12 @@ SELECT gim.github_issue_state AS github_issue_state, gim.github_issue_created_at AS github_issue_created_at, gim.area_override AS area_override, - gim.area_override_since AS area_override_since + gim.area_override_since AS area_override_since, + CAST(CASE WHEN mfu.full_name IS NOT NULL THEN 1 ELSE 0 END AS Uint8) AS is_manual_fast_unmute, + mfu.mfu_since AS manual_fast_unmute_since, + mfu.mfu_window_days AS manual_fast_unmute_window_days, + mfu.mfu_expires_at AS manual_fast_unmute_expires_at, + mfu.mfu_issue_number AS manual_fast_unmute_issue_number FROM `test_results/analytics/tests_monitor` AS tm LEFT JOIN $area_fallback AS af ON Unicode::ToLower(Cast(Coalesce(String::ReplaceAll(tm.owner, 'TEAM:@ydb-platform/', ''), '') AS Utf8)) = af.owner_team @@ -103,6 +134,10 @@ LEFT JOIN $gim_latest AS gim ON tm.full_name = gim.full_name AND tm.branch = gim.branch AND tm.build_type = gim.build_type -WHERE tm.date_window >= CurrentUtcDate() - 1 * Interval("P1D") - AND (tm.branch = 'main' OR tm.branch LIKE 'stable-%') +LEFT JOIN $mfu AS mfu + ON tm.full_name = mfu.full_name + AND tm.branch = mfu.branch + AND tm.build_type = mfu.build_type +WHERE tm.date_window >= CurrentUtcDate() - $mart_monitor_date_span_days * Interval("P1D") + AND (tm.branch = $mart_main_branch OR tm.branch LIKE $mart_stable_branch_like) AND tm.is_test_chunk = 0; diff --git a/.github/scripts/analytics/export_issues_to_ydb.py b/.github/scripts/analytics/export_issues_to_ydb.py index 5d790a99cd1..15d0d4c49d1 100755 --- a/.github/scripts/analytics/export_issues_to_ydb.py +++ b/.github/scripts/analytics/export_issues_to_ydb.py @@ -125,6 +125,28 @@ def fetch_single_issue(org_name: str, repo_name: str, issue_number: int) -> Opti issueType { name } + timelineItems(last: 20, itemTypes: [CLOSED_EVENT]) { + nodes { + ... on ClosedEvent { + createdAt + actor { + __typename + login + } + } + } + } + projectItems(first: 30) { + nodes { + id + project { + id + number + title + url + } + } + } } } } @@ -225,6 +247,28 @@ def fetch_repository_issues(org_name: str = ORG_NAME, repo_name: str = REPO_NAME issueType { name } + timelineItems(last: 20, itemTypes: [CLOSED_EVENT]) { + nodes { + ... on ClosedEvent { + createdAt + actor { + __typename + login + } + } + } + } + projectItems(first: 30) { + nodes { + id + project { + id + number + title + url + } + } + } } pageInfo { hasNextPage @@ -415,6 +459,45 @@ def parse_datetime(dt_str: Optional[str]) -> Optional[datetime]: except (ValueError, TypeError): return None + +def extract_last_close_actor(issue: Dict[str, Any]) -> Dict[str, Any]: + """Who closed the issue — same timeline rule as ``mute.fast_unmute_github.fetch_issue_closers``.""" + login = '' + actor_type = '' + event_at = None + nodes = (issue.get('timelineItems') or {}).get('nodes') or [] + for event in reversed(nodes): + if not event: + continue + actor = event.get('actor') or {} + cand_login = actor.get('login') or '' + if cand_login: + login = cand_login + actor_type = actor.get('__typename') or '' + event_at = parse_datetime(event.get('createdAt')) + break + return {'login': login, 'actor_type': actor_type, 'event_at': event_at} + + +def projects_for_info_json(issue: Dict[str, Any]) -> List[Dict[str, Any]]: + """Projects (v2) that contain this issue — id/title from GraphQL ``projectItems``.""" + out = [] + for node in (issue.get('projectItems') or {}).get('nodes') or []: + proj = node.get('project') or {} + pid = proj.get('id') + if not pid: + continue + row = { + 'project_id': pid, + 'project_number': proj.get('number'), + 'title': proj.get('title'), + 'url': proj.get('url'), + 'project_item_id': node.get('id'), + } + out.append(row) + return out + + # --- branch version helpers --- def parse_branch(label): if label == 'main': @@ -497,6 +580,9 @@ def transform_issues_for_ydb(issues: List[Dict[str, Any]], project_fields: Optio branch = ';'.join(branch_labels) if branch_labels else None max_branch = get_max_branch(branch_labels) if branch_labels else None info = {'branch': branch, 'max_branch': max_branch, 'env': env, 'priority': priority, 'area': area} + proj_list = projects_for_info_json(issue) + if proj_list: + info['projects'] = proj_list # Issue type: GraphQL issueType.name (Bug/Feature/Task), then project field, then label "bug" issue_type = (issue.get('issueType') or {}).get('name') if issue_type is None: @@ -539,6 +625,17 @@ def transform_issues_for_ydb(issues: List[Dict[str, Any]], project_fields: Optio created_at = parse_datetime(issue.get('createdAt')) updated_at = parse_datetime(issue.get('updatedAt')) closed_at = parse_datetime(issue.get('closedAt')) + + closer = extract_last_close_actor(issue) + if closer['login']: + info['closed_by_login'] = closer['login'] + if closer['actor_type']: + info['closed_by_typename'] = closer['actor_type'] + if closer['event_at'] is not None: + info['closed_event_at_iso'] = closer['event_at'].isoformat() + if closed_at: + info['closed_at_iso'] = closed_at.isoformat() + now = datetime.now(timezone.utc) is_in_project = bool(issue_project_fields) diff --git a/.github/scripts/telegram/README.md b/.github/scripts/telegram/README.md index 3de165a37a7..788eb0567cd 100644 --- a/.github/scripts/telegram/README.md +++ b/.github/scripts/telegram/README.md @@ -32,7 +32,7 @@ Specialized script for parsing GitHub issues and sending team-specific notificat **Documentation:** [README_parse_and_send.md](README_parse_and_send.md) ### 🚨 `alert_queued_jobs.py` -Monitors the GitHub Actions queue and sends Telegram alerts when jobs are stuck (PR-check, Postcommit, etc.). Runs on a schedule and manually. +Monitors the GitHub Actions queue and sends Telegram alerts when jobs are stuck (PR-check, Postcommit, etc.). Runs in **`.github/workflows/telegram_scheduled_notifications.yml`** (job **CI queue Telegram alerts**; same file also runs **Mute digest to Telegram** via `send_digest.py`). **Documentation:** [README_alert_queued_jobs.md](README_alert_queued_jobs.md) — vars/secrets, thresholds, local testing (dry-run), CLI. diff --git a/.github/scripts/telegram/README_alert_queued_jobs.md b/.github/scripts/telegram/README_alert_queued_jobs.md index 3e799be2849..9bd26ced8c7 100644 --- a/.github/scripts/telegram/README_alert_queued_jobs.md +++ b/.github/scripts/telegram/README_alert_queued_jobs.md @@ -1,6 +1,6 @@ -# Alert Queued Jobs +# CI queue Telegram alerts (`alert_queued_jobs.py`) -`alert_queued_jobs.py` monitors the GitHub Actions queue and sends Telegram alerts when runs are stuck longer than configured thresholds. Used by [alert_queued_jobs.yml](../../workflows/alert_queued_jobs.yml) (every 30 min + manual `workflow_dispatch`). +`alert_queued_jobs.py` monitors the GitHub Actions queue and sends Telegram alerts when runs are stuck longer than configured thresholds. It runs in the **CI queue Telegram alerts** job of [telegram_scheduled_notifications.yml](../../workflows/telegram_scheduled_notifications.yml) (every 30 min + manual `workflow_dispatch`). That workflow file also contains the **Mute digest to Telegram** job (`send_digest.py`). **Flow:** Fetch `queued` runs from GitHub API → filter by blacklist → compare wait time to thresholds per workflow type → send 1–2 messages to Telegram (optionally with a call string at the start for a duty bot). Can also send an “all good” message when the queue is empty or no jobs are stuck. diff --git a/.github/scripts/telegram/send_digest.py b/.github/scripts/telegram/send_digest.py index 35eb24a3db1..03e74f0ab93 100644 --- a/.github/scripts/telegram/send_digest.py +++ b/.github/scripts/telegram/send_digest.py @@ -5,13 +5,15 @@ Send batched Telegram digests for new muted-test GitHub issues. How it works ------------ Issues are placed into ``digest_queue`` at the moment they are created -(by create_new_muted_ya.py). This script reads unsent rows +(by mute/create_new_muted_ya.py). This script reads unsent rows (sent_at IS NULL), sends per-team Telegram messages, then marks rows as sent by writing sent_at = NOW(). There are no timing assumptions, no cursors, no historical-data floods. The queue is the single source of truth for "what still needs to be sent". +Scheduled by ``.github/workflows/telegram_scheduled_notifications.yml`` (job **Mute digest to Telegram**). + Reads profiles from .github/config/mute_issue_and_digest_config.json and runs only those whose ``schedule_utc_hours`` contains the current UTC hour and whose ``schedule_weekdays`` contains the current ISO weekday (1=Mon … 7=Sun). diff --git a/.github/scripts/tests/mute/__init__.py b/.github/scripts/tests/mute/__init__.py new file mode 100644 index 00000000000..3cf85397632 --- /dev/null +++ b/.github/scripts/tests/mute/__init__.py @@ -0,0 +1,5 @@ +"""Mute automation: ``muted_ya`` generation, GitHub mute issues, manual fast-unmute. + +Fast-unmute logic lives in ``fast_unmute_ydb``, ``fast_unmute_github``, +``fast_unmute_comments``, and ``fast_unmute_pipeline``; ``manual_unmute`` is the CLI. +""" diff --git a/.github/scripts/tests/mute/constants.py b/.github/scripts/tests/mute/constants.py new file mode 100644 index 00000000000..1bf059abf8e --- /dev/null +++ b/.github/scripts/tests/mute/constants.py @@ -0,0 +1,88 @@ +"""Mute pipeline settings from ``.github/config/mute_config.json`` (no cross-imports between scripts).""" + +import json +import os + +_MUTE_CONFIG_PATH = os.path.normpath( + os.path.join(os.path.dirname(__file__), '..', '..', '..', 'config', 'mute_config.json') +) + +_REQUIRED_KEYS = ( + 'manual_unmute_window_days', + 'manual_unmute_min_runs', + 'manual_unmute_ttl_calendar_days', + 'mute_window_days', + 'unmute_window_days', + 'delete_window_days', + 'manual_unmute_issue_closed_lookback_days', + 'manual_unmute_currently_muted_lookback_days', +) + +_CACHE = None + + +def _payload(): + global _CACHE + if _CACHE is not None: + return _CACHE + try: + with open(_MUTE_CONFIG_PATH, 'r', encoding='utf-8') as fp: + raw = json.load(fp) + except OSError as exc: + raise RuntimeError(f'Cannot read mute config {_MUTE_CONFIG_PATH}') from exc + except json.JSONDecodeError as exc: + raise RuntimeError(f'Invalid JSON in mute config {_MUTE_CONFIG_PATH}') from exc + if not isinstance(raw, dict): + raise RuntimeError(f'Mute config must be a JSON object: {_MUTE_CONFIG_PATH}') + missing = [k for k in _REQUIRED_KEYS if k not in raw] + if missing: + raise RuntimeError( + f'{_MUTE_CONFIG_PATH}: missing required key(s): {", ".join(sorted(missing))}' + ) + _CACHE = raw + return _CACHE + + +def _positive_int(key): + v = _payload()[key] + try: + n = int(v) + except (ValueError, TypeError) as exc: + raise RuntimeError( + f'{_MUTE_CONFIG_PATH}: key {key!r} must be a positive integer, got {v!r}' + ) from exc + if n <= 0: + raise RuntimeError(f'{_MUTE_CONFIG_PATH}: key {key!r} must be positive, got {n}') + return n + + +def get_mute_window_days(): + return _positive_int('mute_window_days') + + +def get_unmute_window_days(): + return _positive_int('unmute_window_days') + + +def get_delete_window_days(): + return _positive_int('delete_window_days') + + +def get_manual_unmute_issue_closed_lookback_days(): + return _positive_int('manual_unmute_issue_closed_lookback_days') + + +def get_manual_unmute_currently_muted_lookback_days(): + return _positive_int('manual_unmute_currently_muted_lookback_days') + + +def get_manual_unmute_window_days(): + return _positive_int('manual_unmute_window_days') + + +def get_manual_unmute_min_runs(): + return _positive_int('manual_unmute_min_runs') + + +def get_manual_unmute_ttl_calendar_days(): + return _positive_int('manual_unmute_ttl_calendar_days') diff --git a/.github/scripts/tests/create_new_muted_ya.py b/.github/scripts/tests/mute/create_new_muted_ya.py index 23efab4e0e6..79b56b21cad 100755 --- a/.github/scripts/tests/create_new_muted_ya.py +++ b/.github/scripts/tests/mute/create_new_muted_ya.py @@ -9,40 +9,218 @@ import logging import sys from collections import defaultdict -# Add the parent directory to the path to import update_mute_issues -sys.path.append(os.path.dirname(os.path.abspath(__file__))) +# Runnable as ``python3 .github/scripts/tests/mute/create_new_muted_ya.py``: expose package ``mute``. +_mutedir = os.path.dirname(os.path.abspath(__file__)) +_tests_dir = os.path.dirname(_mutedir) +_scripts_dir = os.path.dirname(_tests_dir) +for _p in (_tests_dir, _scripts_dir, os.path.join(_scripts_dir, 'analytics')): + if _p not in sys.path: + sys.path.insert(0, _p) from mute_check import YaMuteCheck -from update_mute_issues import ( +from mute.update_mute_issues import ( + ORG_NAME, + PROJECT_ID, + close_unmuted_issues, create_and_add_issue_to_project, generate_github_issue_title_and_body, + get_issues_and_tests_from_project, get_muted_tests_from_issues, - close_unmuted_issues, + map_tests_to_manual_fast_unmute_issue_url, ) - -# Add analytics directory to path for ydb_wrapper import -sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'analytics')) +from mute.constants import ( + get_delete_window_days, + get_manual_unmute_min_runs, + get_manual_unmute_window_days, + get_mute_window_days, + get_unmute_window_days, +) +from mute.naming import mute_file_line_to_tests_monitor_full_name from ydb_wrapper import YDBWrapper - -sys.path.append(os.path.join(os.path.dirname(__file__), '..')) from github_issue_utils import DEFAULT_BUILD_TYPE, canonical_team_slug, make_profile_id -# Configure logging -logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') +# Configure logging — root INFO so ydb/grpc don't spam DEBUG (channel options, etc.). +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +for _noisy in ('grpc', 'grpc._cython.cygrpc', 'ydb'): + logging.getLogger(_noisy).setLevel(logging.WARNING) dir = os.path.dirname(__file__) -repo_path = f"{dir}/../../../" +repo_path = os.path.normpath(os.path.join(dir, '..', '..', '..', '..')) + os.sep muted_ya_path = '.github/config/muted_ya.txt' -# Constants for mute logic time windows -MUTE_DAYS = 4 -UNMUTE_DAYS = 7 -DELETE_DAYS = 7 - _DIGEST_NOTIFICATION_CONFIG = os.path.normpath( - os.path.join(dir, '..', '..', 'config', 'mute_issue_and_digest_config.json') + os.path.join(dir, '..', '..', '..', 'config', 'mute_issue_and_digest_config.json') ) +def load_manual_unmute_config(): + """Manual fast-unmute window — required keys in ``mute_config.json`` via ``mute.constants``.""" + return get_manual_unmute_window_days(), get_manual_unmute_min_runs() + + +def tests_monitor_query_days_window(): + """How many calendar days of ``tests_monitor`` history we must load for mute/unmute/delete/fast-unmute.""" + return max( + get_mute_window_days(), + get_unmute_window_days(), + get_delete_window_days(), + get_manual_unmute_window_days(), + ) + + +def grace_started_at_to_utc_date(value): + """Normalize ``grace_started_at`` from scan_query (datetime, date, or int microseconds).""" + if value is None: + return None + if isinstance(value, datetime.datetime): + return value.astimezone(datetime.timezone.utc).date() + if isinstance(value, datetime.date): + return value + if isinstance(value, int): + return datetime.datetime.fromtimestamp( + value / 1_000_000, tz=datetime.timezone.utc + ).date() + return None + + +def merge_mute_aggregate_with_fast_unmute_grace( + all_data, + aggregated_for_mute_default, + grace_map, + manual_window_days, + mute_window_days, +): + """Rebuild mute aggregation list so tests in post–fast-unmute grace use a ladder window. + + Effective window = ``min(mute_window_days, manual_window_days + days_since_grace_started)`` calendar days. + """ + if not grace_map: + return aggregated_for_mute_default + + today = datetime.datetime.now(datetime.timezone.utc).date() + # Which ladder window lengths (eff) actually occur for grace rows. We aggregate only + # those periods — not every integer from manual_window_days..mute_window_days — so we + # skip redundant aggregate_test_data calls when few distinct eff values appear. + needed_effs = set() + for meta in grace_map.values(): + gs_date = grace_started_at_to_utc_date(meta.get('grace_started_at')) + if gs_date is None: + continue + days_since = max(0, (today - gs_date).days) + needed_effs.add(min(mute_window_days, manual_window_days + days_since)) + + # Per eff: full_name -> aggregated row. Lets the loop below do dict lookups instead of + # rebuilding {full_name: row} from the agg list for every test (same result, less work). + maps_by_eff = {} + for d in sorted(needed_effs): + agg_list = aggregate_test_data(all_data, d) + maps_by_eff[d] = {t['full_name']: t for t in agg_list} + + by_name = {t['full_name']: t for t in aggregated_for_mute_default} + merged = [] + for fn, test in by_name.items(): + meta = grace_map.get(fn) + if meta: + gs_date = grace_started_at_to_utc_date(meta.get('grace_started_at')) + if gs_date is not None: + days_since = max(0, (today - gs_date).days) + eff = min(mute_window_days, manual_window_days + days_since) + alt_map = maps_by_eff.get(eff) + if alt_map is not None: + test = alt_map.get(fn, test) + merged.append(test) + return merged + + +def load_fast_unmute_grace_map(ydb_wrapper, branch, build_type): + """Rows in ``fast_unmute_grace``: widening mute threshold after a test left fast-unmute.""" + try: + table_path = ydb_wrapper.get_table_path('fast_unmute_grace') + except KeyError: + logging.info('fast_unmute_grace not registered in ydb_qa_config — ladder disabled') + return {} + + branch_esc = str(branch).replace("'", "''") + bt_esc = str(build_type).replace("'", "''") + query = f""" + SELECT full_name, grace_started_at + FROM `{table_path}` + WHERE branch = '{branch_esc}' + AND build_type = '{bt_esc}' + """ + try: + rows = ydb_wrapper.execute_scan_query(query, query_name='fast_unmute_grace_load') + except Exception as exc: + logging.warning('Failed to load fast_unmute_grace: %s', exc) + return {} + + out = {} + for row in rows: + fn = row.get('full_name') + if fn: + out[fn] = row + return out + + +def delete_fast_unmute_grace_rows(ydb_wrapper, branch, build_type, test_strings): + """Remove grace rows when tests are mute candidates again (wildcards skipped).""" + if not test_strings: + return + try: + table_path = ydb_wrapper.get_table_path('fast_unmute_grace') + except KeyError: + return + + for line in test_strings: + if '*' in line or '?' in line: + continue + full_name = mute_file_line_to_tests_monitor_full_name(line) + query = f""" + DECLARE $full_name AS Utf8; + DECLARE $branch AS Utf8; + DECLARE $build_type AS Utf8; + DELETE FROM `{table_path}` + WHERE full_name = $full_name + AND branch = $branch + AND build_type = $build_type; + """ + try: + ydb_wrapper.execute_dml( + query, + {'$full_name': full_name, '$branch': branch, '$build_type': build_type}, + query_name='fast_unmute_grace_delete_on_remute', + ) + except Exception as exc: + logging.warning( + 'Failed to delete grace row for mute line %r (monitor key %r): %s', + line, + full_name, + exc, + ) + + +def load_manual_unmute_full_names(ydb_wrapper, branch, build_type): + """Return the set of full_name registered for manual fast-unmute on this (branch, build_type).""" + try: + table_path = ydb_wrapper.get_table_path('fast_unmute_active') + except KeyError: + logging.info('fast_unmute_active not registered in ydb_qa_config — manual fast-unmute disabled') + return set() + + branch_escaped = str(branch).replace("'", "''") + build_type_escaped = str(build_type).replace("'", "''") + query = f""" + SELECT full_name + FROM `{table_path}` + WHERE branch = '{branch_escaped}' + AND build_type = '{build_type_escaped}' + """ + try: + rows = ydb_wrapper.execute_scan_query(query, query_name='manual_unmute_load_full_names') + except Exception as exc: + logging.warning('Failed to load fast_unmute_active: %s — manual fast-unmute disabled', exc) + return set() + return {row['full_name'] for row in rows if row.get('full_name')} + def is_chunk_test(test): # First, check the is_test_chunk field if it exists. @@ -107,7 +285,9 @@ def get_wildcard_delete_candidates(aggregated_for_delete, mute_check, is_delete_ return result -def execute_query(branch='main', build_type=DEFAULT_BUILD_TYPE, days_window=7, ydb_wrapper=None): +def execute_query(branch='main', build_type=DEFAULT_BUILD_TYPE, days_window=None, ydb_wrapper=None): + if days_window is None: + days_window = tests_monitor_query_days_window() logging.info(f"Executing query for branch='{branch}', build_type='{build_type}', days_window={days_window}") def _run(w): @@ -356,7 +536,15 @@ def is_mute_candidate(test): fail_count = test.get('fail_count', 0) result = (fail_count >= 3 and total_runs > 10) or (fail_count >= 2 and total_runs <= 10) - logging.debug(f"MUTE_CHECK: {test.get('full_name')} - runs:{total_runs}, fails:{fail_count}, state:{test.get('state')}, muted:{test.get('is_muted')}, result:{result}") + logging.info( + 'MUTE_CHECK: %s - runs:%s, fails:%s, state:%s, muted:%s, result:%s', + test.get('full_name'), + total_runs, + fail_count, + test.get('state'), + test.get('is_muted'), + result, + ) return result @@ -368,7 +556,16 @@ def is_unmute_candidate(test): result = total_runs >= 4 and total_fails == 0 if test.get('is_muted', False): - logging.debug(f"UNMUTE_CHECK: {test.get('full_name')} - runs:{total_runs}, fails:{total_fails}, mute_count:{test.get('mute_count')}, state:{test.get('state')}, muted:{test.get('is_muted')}, result:{result}") + logging.info( + 'UNMUTE_CHECK: %s - runs:%s, fails:%s, mute_count:%s, state:%s, muted:%s, result:%s', + test.get('full_name'), + total_runs, + total_fails, + test.get('mute_count'), + test.get('state'), + test.get('is_muted'), + result, + ) return result @@ -390,15 +587,30 @@ def is_delete_candidate(test): result = total_runs == 0 or only_skipped_while_muted if test.get('is_muted', False): - logging.debug( - f"DELETE_CHECK: {test.get('full_name')} - runs:{total_runs}, " - f"p:{pass_count}, f:{fail_count}, m:{mute_count}, s:{skip_count}, " - f"muted:{test.get('is_muted')}, only_skipped_while_muted:{only_skipped_while_muted}, result:{result}" + logging.info( + 'DELETE_CHECK: %s - runs:%s, p:%s, f:%s, m:%s, s:%s, muted:%s, ' + 'only_skipped_while_muted:%s, result:%s', + test.get('full_name'), + total_runs, + pass_count, + fail_count, + mute_count, + skip_count, + test.get('is_muted'), + only_skipped_while_muted, + result, ) return result -def create_file_set(aggregated_for_mute, filter_func, mute_check=None, use_wildcards=False, resolution=None): +def create_file_set( + aggregated_for_mute, + filter_func, + mute_check=None, + use_wildcards=False, + resolution=None, + debug_suffix='', +): """Create a set of tests for output file based on a filter.""" result_set = set() debug_list = [] @@ -428,8 +640,10 @@ def create_file_set(aggregated_for_mute, filter_func, mute_check=None, use_wildc debug_string = create_debug_string( test, period_days=test.get('period_days'), - date_window=test.get('date_window') + date_window=test.get('date_window'), ) + if debug_suffix: + debug_string += debug_suffix debug_list.append(debug_string) # Force 100% output if it was not printed yet. @@ -459,7 +673,21 @@ def write_file_set(file_path, test_set, debug_list=None, sort_without_prefixes=F add_lines_to_file(debug_path, [line + '\n' for line in sorted_debug_list]) logging.info(f"Created {os.path.basename(file_path)} with {len(sorted_test_set)} tests") -def apply_and_add_mutes(all_data, output_path, mute_check, aggregated_for_mute, aggregated_for_unmute, aggregated_for_delete): +def apply_and_add_mutes( + all_data, + output_path, + mute_check, + aggregated_for_mute, + aggregated_for_unmute, + aggregated_for_delete, + manual_unmute_full_names=None, + aggregated_for_manual_unmute=None, + manual_unmute_min_runs=None, + manual_unmute_window_days=None, + ydb_wrapper=None, + branch=None, + build_type=None, +): output_path = os.path.join(output_path, 'mute_update') logging.info(f"Creating mute files in directory: {output_path}") @@ -476,8 +704,7 @@ def apply_and_add_mutes(all_data, output_path, mute_check, aggregated_for_mute, to_mute, to_mute_debug = create_file_set( aggregated_for_mute, is_mute_candidate, use_wildcards=True, resolution='to_mute' ) - write_file_set(os.path.join(output_path, 'to_mute.txt'), to_mute, to_mute_debug) - + # 2. Unmute candidates. def is_unmute_non_chunk(test): if is_chunk_test(test): @@ -498,9 +725,53 @@ def apply_and_add_mutes(all_data, output_path, mute_check, aggregated_for_mute, # Merge per-test and wildcard results. to_unmute = sorted(list(set(to_unmute) | set(wildcard_unmute_patterns))) to_unmute_debug = sorted(list(set(to_unmute_debug) | set(wildcard_unmute_debugs))) - + + # 2a. Manual fast-unmute candidates. + # A test is considered under manual fast-unmute when its full_name is + # registered in `fast_unmute_active` (populated when a + # user manually closes the mute issue). Such tests are evaluated on a + # shorter window and smaller min_runs threshold, so they get unmuted + # sooner when stable. + manual_unmute_full_names = set(manual_unmute_full_names or []) + if manual_unmute_full_names and aggregated_for_manual_unmute and manual_unmute_min_runs: + def is_manual_unmute_candidate(test): + if is_chunk_test(test): + return False + fn = test.get('full_name') + if fn not in manual_unmute_full_names: + return False + total_runs = test.get('pass_count', 0) + test.get('fail_count', 0) + test.get('mute_count', 0) + total_fails = test.get('fail_count', 0) + test.get('mute_count', 0) + result = total_runs >= manual_unmute_min_runs and total_fails == 0 + logging.info( + 'FAST_UNMUTE_CHECK: %s - runs:%s, fails:%s, min_runs:%s, window_days=%s, result:%s', + fn, + total_runs, + total_fails, + manual_unmute_min_runs, + manual_unmute_window_days if manual_unmute_window_days is not None else '?', + result, + ) + return result + + to_unmute_manual, to_unmute_manual_debug = create_file_set( + aggregated_for_manual_unmute, + is_manual_unmute_candidate, + mute_check, + resolution='to_unmute', + debug_suffix=' [fast-unmute]', + ) + if to_unmute_manual: + logging.info(f"Manual fast-unmute added {len(to_unmute_manual)} test(s) to to_unmute") + to_unmute = sorted(list(set(to_unmute) | set(to_unmute_manual))) + to_unmute_debug = sorted(list(set(to_unmute_debug) | set(to_unmute_manual_debug))) + + write_file_set(os.path.join(output_path, 'to_mute.txt'), to_mute, to_mute_debug) write_file_set(os.path.join(output_path, 'to_unmute.txt'), to_unmute, to_unmute_debug) - + + if ydb_wrapper is not None and branch is not None and build_type is not None: + delete_fast_unmute_grace_rows(ydb_wrapper, branch, build_type, to_mute) + # 3. Delete-from-mute candidates (to_delete). def is_delete_non_chunk(test): if is_chunk_test(test): @@ -668,7 +939,9 @@ def read_tests_from_file(file_path): def create_mute_issues(all_tests, file_path, close_issues=True, branch='main', build_type=DEFAULT_BUILD_TYPE): tests_from_file = read_tests_from_file(file_path) - muted_tests_in_issues = get_muted_tests_from_issues() + issues_index = get_issues_and_tests_from_project(ORG_NAME, PROJECT_ID) + muted_tests_in_issues = get_muted_tests_from_issues(issues_index) + manual_fast_unmute_issue_by_test = map_tests_to_manual_fast_unmute_issue_url(issues_index) prepared_tests_by_suite = {} temp_tests_by_suite = {} @@ -701,6 +974,14 @@ def create_mute_issues(all_tests, file_path, close_issues=True, branch='main', b f"test {full_name} ({build_type}) already have issue, {muted_tests_in_issues[issue_key][0]['url']}" ) continue + if issue_key in manual_fast_unmute_issue_by_test: + logging.info( + 'test %s (%s) skipped: existing issue with manual-fast-unmute label: %s', + full_name, + build_type, + manual_fast_unmute_issue_by_test[issue_key], + ) + continue monitor = monitor_by_name.get((full_name, build_type)) if monitor and is_chunk_test(monitor): @@ -1017,23 +1298,68 @@ def mute_worker(args): mute_check.load(input_muted_ya_path) logging.info(f"Loaded muted_ya.txt with {len(mute_check.regexps)} test patterns") - all_data = execute_query( - args.branch, build_type=build_type, days_window=7, ydb_wrapper=ydb_wrapper - ) + mute_window_days = get_mute_window_days() + unmute_window_days = get_unmute_window_days() + delete_window_days = get_delete_window_days() + + all_data = execute_query(args.branch, build_type=build_type, ydb_wrapper=ydb_wrapper) logging.info(f"Query returned {len(all_data)} test records") # Use unified aggregation for different periods. - aggregated_for_mute = aggregate_test_data(all_data, MUTE_DAYS) # MUTE_DAYS for mute - aggregated_for_unmute = aggregate_test_data(all_data, UNMUTE_DAYS) # UNMUTE_DAYS for unmute - aggregated_for_delete = aggregate_test_data(all_data, DELETE_DAYS) # DELETE_DAYS for delete - + aggregated_for_mute = aggregate_test_data(all_data, mute_window_days) + aggregated_for_unmute = aggregate_test_data(all_data, unmute_window_days) + aggregated_for_delete = aggregate_test_data(all_data, delete_window_days) + + manual_unmute_window_days, manual_unmute_min_runs = load_manual_unmute_config() + manual_unmute_full_names = set() + aggregated_for_manual_unmute = None + if manual_unmute_window_days and manual_unmute_min_runs: + manual_unmute_full_names = load_manual_unmute_full_names(ydb_wrapper, args.branch, build_type) + if manual_unmute_full_names: + aggregated_for_manual_unmute = aggregate_test_data(all_data, manual_unmute_window_days) + logging.info( + f"Manual fast-unmute: window={manual_unmute_window_days}d, min_runs={manual_unmute_min_runs}, " + f"tests={len(manual_unmute_full_names)}" + ) + logging.info(f"Aggregated data: mute={len(aggregated_for_mute)}, unmute={len(aggregated_for_unmute)}, delete={len(aggregated_for_delete)}") - + + grace_map = load_fast_unmute_grace_map(ydb_wrapper, args.branch, build_type) + if grace_map: + ladder_base = manual_unmute_window_days or 2 + aggregated_for_mute = merge_mute_aggregate_with_fast_unmute_grace( + all_data, + aggregated_for_mute, + grace_map, + ladder_base, + mute_window_days, + ) + logging.info( + 'Fast-unmute grace ladder: %d test(s), effective mute window min(%d, %d + days_since_grace)', + len(grace_map), + mute_window_days, + ladder_base, + ) + if args.mode == 'update_muted_ya': output_path = args.output_folder os.makedirs(output_path, exist_ok=True) logging.info(f"Creating mute files in: {output_path}") - apply_and_add_mutes(all_data, output_path, mute_check, aggregated_for_mute, aggregated_for_unmute, aggregated_for_delete) + apply_and_add_mutes( + all_data, + output_path, + mute_check, + aggregated_for_mute, + aggregated_for_unmute, + aggregated_for_delete, + manual_unmute_full_names=manual_unmute_full_names, + aggregated_for_manual_unmute=aggregated_for_manual_unmute, + manual_unmute_min_runs=manual_unmute_min_runs, + manual_unmute_window_days=manual_unmute_window_days, + ydb_wrapper=ydb_wrapper, + branch=args.branch, + build_type=build_type, + ) elif args.mode == 'create_issues': file_path = args.file_path diff --git a/.github/scripts/tests/mute/fast_unmute_comments.py b/.github/scripts/tests/mute/fast_unmute_comments.py new file mode 100644 index 00000000000..1d2791fc772 --- /dev/null +++ b/.github/scripts/tests/mute/fast_unmute_comments.py @@ -0,0 +1,70 @@ +"""User-visible GitHub comments for the manual fast-unmute flow.""" + + +def format_bullet_list(tests): + return '\n'.join(f"- `{name}`" for name in sorted(set(tests))) + + +COMMENT_ENTER = """🚀 **Fast-unmute started** + +{closer_mention_line} +**Status** → **Observation** + +These tests are still listed in `muted_ya`, but CI will try to unmute them sooner ({window_days} days window, at least {min_runs} clean runs): + +{tests_bullet_list} + +**What to expect** + +- If tests go green in CI before the limit → board **Unmuted**, label removed. +- If the limit passes and something is still red → this issue **reopens**, board **Muted**, label removed. + +You do not need to do anything. Please do not edit `muted_ya.txt` by hand. + +🔗 Workflow run: {workflow_run_url} +""" + + +COMMENT_SUCCESS = """✅ **Fast-unmute completed** + +Every test on this issue is green in CI before the time limit. The mute list in the repo will catch up on the next routine update. + +**Status** → **Unmuted**. Label `manual-fast-unmute` removed. + +🔗 Workflow run: {workflow_run_url} +""" + + +COMMENT_PROGRESS = """⏱️ **Fast-unmute: progress** + +These already show as unmuted in CI. Other tests on this issue are still in the shorter window: + +{unmuted_bullets} + +🔗 Workflow run: {workflow_run_url} +""" + + +COMMENT_ABANDON_NOT_COMPLETED = """🛑 **Fast-unmute stopped** + +Tracking for fast-unmute on this issue was stopped: **Status** → **Muted**. + +🔗 Workflow run: {workflow_run_url} +""" + + +COMMENT_TTL_INCOMPLETE = """❌ **Fast-unmute: deadline passed** + +At least one **(test, branch, build)** on this issue stayed muted past **its** **{ttl_days}**-day window. This issue is **reopened**, **Status** → **Muted** + +**Green in CI** (already unmuted there, or will be shortly once the mute list in the repo updates): +{graduated_bullets} + +**Still on the CI mute list** when the window closed — at least one of these stayed muted and led to the reopen: +{stuck_bullets} + +**Other rows cleared in the same shutdown:** +{cleared_other_bullets} + +🔗 Workflow run: {workflow_run_url} +""" diff --git a/.github/scripts/tests/mute/fast_unmute_github.py b/.github/scripts/tests/mute/fast_unmute_github.py new file mode 100644 index 00000000000..e26306f81e1 --- /dev/null +++ b/.github/scripts/tests/mute/fast_unmute_github.py @@ -0,0 +1,336 @@ +"""GitHub GraphQL for manual fast-unmute (timeline, labels, org project, reopen).""" + +import logging + +from mute.update_mute_issues import ( + MANUAL_FAST_UNMUTE_GITHUB_LABEL, + ORG_NAME, + PROJECT_ID, + REPO_NAME, + get_project_v2_fields, + run_query, +) + +LABEL_NAME = MANUAL_FAST_UNMUTE_GITHUB_LABEL +_LABEL_ID_CACHE = {} + +PROJECT_STATUS_ON_FAST_UNMUTE_REOPEN = 'Observation' +PROJECT_STATUS_ON_FAST_UNMUTE_FAIL = 'Muted' +PROJECT_STATUS_ON_FAST_UNMUTE_SUCCESS = 'Unmuted' + + +def fetch_issue_closers(issue_numbers): + """Return {issue_number: {'login': str, 'type': 'User'|'Bot'|''}}. + + We need this because the exported `issues` table does not carry "closed by", + so for the short list of candidates we query GitHub directly. + """ + result = {} + numbers = sorted({int(n) for n in (issue_numbers or []) if n is not None}) + if not numbers: + return result + chunk_size = 50 + for i in range(0, len(numbers), chunk_size): + chunk = numbers[i : i + chunk_size] + subqueries = [] + for number in chunk: + subqueries.append( + f""" + n{number}: issue(number: {number}) {{ + timelineItems(last: 1, itemTypes: [CLOSED_EVENT]) {{ + nodes {{ + ... on ClosedEvent {{ + actor {{ __typename login }} + }} + }} + }} + }} + """ + ) + query = f""" + query {{ + repository(owner: "{ORG_NAME}", name: "{REPO_NAME}") {{ + {' '.join(subqueries)} + }} + }} + """ + response = run_query(query) + repo_data = (response.get('data') or {}).get('repository') or {} + for number in chunk: + node = repo_data.get(f'n{number}') + login = '' + actor_type = '' + if node: + events = (node.get('timelineItems') or {}).get('nodes') or [] + event = events[0] if events else {} + actor = event.get('actor') or {} + login = actor.get('login') or '' + actor_type = actor.get('__typename') or '' + result[number] = {'login': login, 'type': actor_type} + return result + + +def reopen_issue(issue_id): + """Reopen a closed issue. No-op if already open.""" + state_query = """ + query ($issueId: ID!) { + node(id: $issueId) { + ... on Issue { state } + } + } + """ + state_result = run_query(state_query, {'issueId': issue_id}) + state = ((state_result.get('data') or {}).get('node') or {}).get('state') + if state != 'CLOSED': + return + mutation = """ + mutation ($issueId: ID!) { + reopenIssue(input: {issueId: $issueId}) { issue { id } } + } + """ + run_query(mutation, {'issueId': issue_id}) + + +def _issue_project_board_item_id(issue_node_id, project_number): + """Return Project v2 **item** id for ``issue_node_id`` on board ``project_number``, or ``None``.""" + query = """ + query ($issueId: ID!) { + node(id: $issueId) { + ... on Issue { + projectItems(first: 40) { + nodes { + id + project { number } + } + } + } + } + } + """ + try: + result = run_query(query, {'issueId': issue_node_id}) + except Exception as exc: + logging.warning('manual_unmute: projectItems query failed: %s', exc) + return None + node = (result.get('data') or {}).get('node') or {} + want = int(project_number) + for it in ((node.get('projectItems') or {}).get('nodes')) or []: + num = (it.get('project') or {}).get('number') + if num is not None and int(num) == want: + return it.get('id') + return None + + +def _add_issue_to_org_project(project_global_id, issue_node_id): + """Add issue to org project; return new project **item** id.""" + mutation = """ + mutation ($projectId: ID!, $contentId: ID!) { + addProjectV2ItemById(input: {projectId: $projectId, contentId: $contentId}) { + item { id } + } + } + """ + try: + result = run_query( + mutation, {'projectId': project_global_id, 'contentId': issue_node_id} + ) + except Exception as exc: + logging.warning('manual_unmute: addProjectV2ItemById failed: %s', exc) + return None + item = (((result.get('data') or {}).get('addProjectV2ItemById') or {}).get('item') or {}) + return item.get('id') + + +def set_manual_unmute_project_board_status(issue_node_id, status_label): + """Set org project ``Status`` (single select) by option name (case-insensitive). + + Issues not yet on the board are added to the project (same behaviour as mute tooling). + Requires token scope that can read/update org projects. + """ + label = (status_label or '').strip() + if not label: + return + try: + project_global_id, project_fields = get_project_v2_fields(ORG_NAME, PROJECT_ID) + except Exception as exc: + logging.warning('manual_unmute: could not load project %s fields: %s', PROJECT_ID, exc) + return + status_field_id = None + option_id = None + want = label.lower() + for field in project_fields: + if (field.get('name') or '').lower() != 'status': + continue + status_field_id = field.get('id') + for opt in field.get('options') or []: + if (opt.get('name') or '').lower() == want: + option_id = opt.get('id') + break + break + if not status_field_id or not option_id: + logging.warning( + 'manual_unmute: project %s: Status field or %r option not found; skip board update', + PROJECT_ID, + label, + ) + return + + item_id = _issue_project_board_item_id(issue_node_id, PROJECT_ID) + if not item_id: + item_id = _add_issue_to_org_project(project_global_id, issue_node_id) + if not item_id: + logging.warning( + 'manual_unmute: could not resolve or create project item for issue (project %s)', + PROJECT_ID, + ) + return + + mutation = """ + mutation ($projectId: ID!, $itemId: ID!, $fieldId: ID!, $optionId: String) { + updateProjectV2ItemFieldValue(input: { + projectId: $projectId, + itemId: $itemId, + fieldId: $fieldId, + value: { singleSelectOptionId: $optionId } + }) { + projectV2Item { id } + } + } + """ + try: + run_query( + mutation, + { + 'projectId': project_global_id, + 'itemId': item_id, + 'fieldId': status_field_id, + 'optionId': option_id, + }, + ) + logging.info( + 'manual_unmute: set project %s Status to %r', + PROJECT_ID, + label, + ) + except Exception as exc: + logging.warning('manual_unmute: updateProjectV2ItemFieldValue failed: %s', exc) + + +def set_fast_unmute_reopen_project_status(issue_node_id): + """Set org project Status → Observation when entering fast-unmute.""" + set_manual_unmute_project_board_status( + issue_node_id, PROJECT_STATUS_ON_FAST_UNMUTE_REOPEN + ) + + +def _get_label_id(): + """Resolve the pre-created label node id (cached). Returns None if missing.""" + if LABEL_NAME in _LABEL_ID_CACHE: + return _LABEL_ID_CACHE[LABEL_NAME] + + query = """ + query ($owner: String!, $name: String!, $labelName: String!) { + repository(owner: $owner, name: $name) { + label(name: $labelName) { id } + } + } + """ + result = run_query( + query, + {'owner': ORG_NAME, 'name': REPO_NAME, 'labelName': LABEL_NAME}, + ) + label = (((result.get('data') or {}).get('repository') or {}).get('label') or {}) + label_id = label.get('id') + if not label_id: + logging.warning( + "Label %r not found in %s/%s — create it manually in the repository labels page", + LABEL_NAME, + ORG_NAME, + REPO_NAME, + ) + return None + _LABEL_ID_CACHE[LABEL_NAME] = label_id + return label_id + + +def add_label_to_issue(issue_id): + """Attach the fast-unmute label. Idempotent — GitHub ignores duplicates.""" + label_id = _get_label_id() + if not label_id: + return + mutation = """ + mutation ($labelableId: ID!, $labelIds: [ID!]!) { + addLabelsToLabelable(input: {labelableId: $labelableId, labelIds: $labelIds}) { + labelable { __typename } + } + } + """ + try: + run_query(mutation, {'labelableId': issue_id, 'labelIds': [label_id]}) + except Exception as exc: + logging.warning('Failed to add label to issue %s: %s', issue_id, exc) + + +def remove_label_from_issue(issue_id): + """Detach the fast-unmute label. No-op if label is not present.""" + label_id = _get_label_id() + if not label_id: + return + mutation = """ + mutation ($labelableId: ID!, $labelIds: [ID!]!) { + removeLabelsFromLabelable(input: {labelableId: $labelableId, labelIds: $labelIds}) { + labelable { __typename } + } + } + """ + try: + run_query(mutation, {'labelableId': issue_id, 'labelIds': [label_id]}) + except Exception as exc: + logging.warning('Failed to remove label from issue %s: %s', issue_id, exc) + + +def fetch_issue_states(issue_numbers): + """Return ``{issue_number: {'id', 'state', 'state_reason'}}}`` from GitHub GraphQL. + + ``state_reason`` mirrors GitHub's ``stateReason`` (e.g. ``COMPLETED`` when closed). + Issues missing from the response are omitted. + """ + result = {} + numbers = sorted({int(n) for n in (issue_numbers or []) if n is not None}) + if not numbers: + return result + chunk_size = 50 + for i in range(0, len(numbers), chunk_size): + chunk = numbers[i : i + chunk_size] + subqueries = [ + f"n{n}: issue(number: {n}) {{ id state stateReason }}" for n in chunk + ] + query = f""" + query {{ + repository(owner: "{ORG_NAME}", name: "{REPO_NAME}") {{ + {' '.join(subqueries)} + }} + }} + """ + response = run_query(query) + repo_data = (response.get('data') or {}).get('repository') or {} + for number in chunk: + node = repo_data.get(f'n{number}') + if not node or not node.get('id'): + continue + result[number] = { + 'id': node['id'], + 'state': str(node.get('state') or ''), + 'state_reason': str(node.get('stateReason') or ''), + } + return result + + +def fetch_issue_node_ids(issue_numbers): + """Return ``{issue_number: issue_node_id}`` (same GraphQL batching as ``fetch_issue_states``).""" + return {n: data['id'] for n, data in fetch_issue_states(issue_numbers).items()} + + +def issue_eligible_for_manual_fast_unmute_entry(state, state_reason): + """Same gate as YDB candidate issues: closed by human as completed.""" + return (state or '').strip().upper() == 'CLOSED' and (state_reason or '').strip().upper() == 'COMPLETED' diff --git a/.github/scripts/tests/mute/fast_unmute_pipeline.py b/.github/scripts/tests/mute/fast_unmute_pipeline.py new file mode 100644 index 00000000000..9e523974ac4 --- /dev/null +++ b/.github/scripts/tests/mute/fast_unmute_pipeline.py @@ -0,0 +1,512 @@ +"""Orchestration: enter / cleanup / sync for manual fast-unmute.""" + +import datetime +import logging +import os +from collections import defaultdict + +from github_issue_utils import DEFAULT_BUILD_TYPE + +from mute.constants import ( + get_manual_unmute_issue_closed_lookback_days, + get_manual_unmute_min_runs, + get_manual_unmute_ttl_calendar_days, + get_manual_unmute_window_days, + get_mute_window_days, +) +from mute.fast_unmute_comments import ( + COMMENT_ABANDON_NOT_COMPLETED, + COMMENT_ENTER, + COMMENT_PROGRESS, + COMMENT_SUCCESS, + COMMENT_TTL_INCOMPLETE, + format_bullet_list, +) +from mute.fast_unmute_github import ( + PROJECT_STATUS_ON_FAST_UNMUTE_FAIL, + PROJECT_STATUS_ON_FAST_UNMUTE_SUCCESS, + add_label_to_issue, + fetch_issue_closers, + fetch_issue_node_ids, + fetch_issue_states, + issue_eligible_for_manual_fast_unmute_entry, + remove_label_from_issue, + reopen_issue, + set_fast_unmute_reopen_project_status, + set_manual_unmute_project_board_status, +) +from mute.fast_unmute_ydb import ( + _coerce_dt, + count_rows_per_issue, + create_fast_unmute_grace_table, + create_manual_unmute_table, + delete_grace_row, + delete_row, + expire_fast_unmute_grace, + fetch_all_rows, + fetch_candidate_issues, + fetch_currently_muted, + upsert_fast_unmute_grace_row, + upsert_rows, +) +from mute.update_mute_issues import add_issue_comment, parse_body + +# GitHub ``__typename`` is ``User`` for PAT-based bot accounts; skip known bot logins (M2). +BOT_LOGINS = frozenset({'ydbot', 'github-actions'}) + +_LOG = logging.getLogger('manual_unmute') + + +def grace_ttl_calendar_days(mute_window_days, manual_unmute_window_days): + """Calendar days a ``fast_unmute_grace`` row is kept (same rule as ``expire_fast_unmute_grace``). + + Stored on insert so dashboards and TTL stay interpretable even if ``mute_config.json`` changes later. + """ + return max(1, int(mute_window_days) - int(manual_unmute_window_days)) + + +def load_config(): + """Fast-track window/min-runs — same keys as ``mute.constants`` / ``mute_config.json``.""" + return { + 'window_days': get_manual_unmute_window_days(), + 'min_runs': get_manual_unmute_min_runs(), + } + + +def _delete_fast_unmute_row_and_grace( + ydb_wrapper, table_path, grace_table_path, full_name, branch, build_type, *, log_prefix +): + """Remove one ``fast_unmute_active`` row and best-effort matching ``fast_unmute_grace`` row.""" + delete_row(ydb_wrapper, table_path, full_name, branch, build_type) + if not grace_table_path: + return + try: + delete_grace_row(ydb_wrapper, grace_table_path, full_name, branch, build_type) + except Exception as exc: + logging.warning( + '%s: grace delete %s %s %s: %s', + log_prefix, + full_name, + branch, + build_type, + exc, + ) + + +def abandon_fast_unmute_if_issue_not_completed(ydb_wrapper, table_path, grace_table_path): + """Drop fast-unmute rows when the issue is no longer CLOSED+COMPLETED on GitHub (e.g. reopened). + + Clears ``fast_unmute_active`` (and matching ``fast_unmute_grace`` rows when configured), + removes the label, sets project Status → Muted, and posts ``COMMENT_ABANDON_NOT_COMPLETED``. + """ + rows = fetch_all_rows(ydb_wrapper, table_path) + if not rows: + return + + by_issue = defaultdict(list) + for row in rows: + inn = row.get('github_issue_number') + if inn is None: + continue + by_issue[int(inn)].append(row) + + if not by_issue: + return + + states = fetch_issue_states(list(by_issue.keys())) + run_url = workflow_run_url() + abandoned = 0 + rows_deleted = 0 + + for issue_number in sorted(by_issue.keys()): + meta = states.get(issue_number) or {} + issue_id = meta.get('id') + state = meta.get('state') or '' + state_reason = meta.get('state_reason') or '' + if issue_eligible_for_manual_fast_unmute_entry(state, state_reason): + continue + if not issue_id: + logging.warning( + 'manual_unmute_abandon: issue #%s has fast-unmute rows but no GitHub node id; skip', + issue_number, + ) + continue + + for row in by_issue[issue_number]: + fn, br, bt = row.get('full_name'), row.get('branch'), row.get('build_type') + if not fn or not br or not bt: + continue + _delete_fast_unmute_row_and_grace( + ydb_wrapper, table_path, grace_table_path, fn, br, bt, log_prefix='manual_unmute_abandon' + ) + rows_deleted += 1 + + remove_label_from_issue(issue_id) + set_manual_unmute_project_board_status(issue_id, PROJECT_STATUS_ON_FAST_UNMUTE_FAIL) + add_issue_comment( + issue_id, + COMMENT_ABANDON_NOT_COMPLETED.format(workflow_run_url=run_url), + ) + abandoned += 1 + + if abandoned: + logging.info( + 'manual_unmute_abandon: cleared %d issue(s), %d fast_unmute row(s) (issue not CLOSED+COMPLETED)', + abandoned, + rows_deleted, + ) + + +def workflow_run_url(): + server = os.environ.get('GITHUB_SERVER_URL', 'https://github.com') + repo = os.environ.get('GITHUB_REPOSITORY', '') + run_id = os.environ.get('GITHUB_RUN_ID', '') + if repo and run_id: + return f"{server}/{repo}/actions/runs/{run_id}" + return 'N/A' + + +def enter_manual_unmute(ydb_wrapper, table_path, issues_table_path, tests_monitor_path, window_days, min_runs): + """Discover newly-closed-by-human issues and register their still-muted tests.""" + existing = { + (r['full_name'], r['branch'], r['build_type']): r + for r in fetch_all_rows(ydb_wrapper, table_path) + if r.get('full_name') and r.get('branch') and r.get('build_type') + } + + raw_candidates = fetch_candidate_issues( + ydb_wrapper, issues_table_path, get_manual_unmute_issue_closed_lookback_days() + ) + candidates = list( + {int(c['issue_number']): c for c in raw_candidates if c.get('issue_number') is not None}.values() + ) + if not candidates: + logging.info('manual_unmute_enter: no CLOSED+COMPLETED candidates in lookback window') + return + + issue_numbers = {int(c['issue_number']) for c in candidates if c.get('issue_number') is not None} + closers = fetch_issue_closers(issue_numbers) + + muted_cache = {} + now = datetime.datetime.now(tz=datetime.timezone.utc) + run_url = workflow_run_url() + new_rows = [] + + for issue in candidates: + issue_number_raw = issue.get('issue_number') + issue_id = issue.get('issue_id') + if issue_number_raw is None or not issue_id: + _LOG.debug( + 'enter: skip candidate without issue_number/issue_id: number=%r id=%r', + issue_number_raw, + issue_id, + ) + continue + issue_number = int(issue_number_raw) + + closer = closers.get(issue_number) or {} + if closer.get('type') != 'User': + _LOG.debug( + 'enter: skip #%s: closer is not User (login=%r type=%r)', + issue_number, + closer.get('login'), + closer.get('type'), + ) + continue + login = (closer.get('login') or '').lower() + if login in BOT_LOGINS: + _LOG.debug( + 'enter: skip #%s: closer login %r is bot-denylisted', + issue_number, + login, + ) + continue + + parsed = parse_body(issue.get('body') or '') + tests = parsed.tests + branches = parsed.branches or ['main'] + build_type = parsed.build_type or DEFAULT_BUILD_TYPE + if not tests: + _LOG.debug('enter: skip #%s: no tests parsed from issue body', issue_number) + continue + + issue_rows = [] + for full_name in tests: + for branch in branches: + cache_key = (branch, build_type) + if cache_key not in muted_cache: + muted_cache[cache_key] = fetch_currently_muted( + ydb_wrapper, tests_monitor_path, branch, build_type + ) + if full_name not in muted_cache[cache_key]: + _LOG.debug( + 'enter: skip #%s test %r: not currently muted on branch=%r build_type=%r', + issue_number, + full_name, + branch, + build_type, + ) + continue + row_key = (full_name, branch, build_type) + if row_key in existing: + _LOG.debug( + 'enter: skip #%s test %r: row already in fast_unmute_active %s', + issue_number, + full_name, + row_key, + ) + continue + issue_rows.append({ + 'full_name': full_name, + 'branch': branch, + 'build_type': build_type, + 'github_issue_number': issue_number, + 'requested_at': now, + 'window_days': window_days, + }) + + if not issue_rows: + _LOG.debug( + 'enter: skip #%s: zero rows after filtering (parsed tests=%s branches=%s build_type=%s)', + issue_number, + sorted(tests), + branches, + build_type, + ) + continue + + upsert_rows(ydb_wrapper, table_path, issue_rows) + + set_fast_unmute_reopen_project_status(issue_id) + raw_login = (closer.get('login') or '').strip() + closer_mention_line = f'@{raw_login}\n\n' if raw_login else '' + add_issue_comment( + issue_id, + COMMENT_ENTER.format( + closer_mention_line=closer_mention_line, + closer_login=raw_login or 'unknown', + window_days=window_days, + min_runs=min_runs, + tests_bullet_list=format_bullet_list(r['full_name'] for r in issue_rows), + workflow_run_url=run_url, + ), + ) + add_label_to_issue(issue_id) + + new_rows.extend(issue_rows) + for row in issue_rows: + existing[(row['full_name'], row['branch'], row['build_type'])] = row + + logging.info( + 'manual_unmute_enter: inserted %d row(s) from %d candidate issue(s)', + len(new_rows), + len(candidates), + ) + + +def cleanup_manual_unmute(ydb_wrapper, table_path, tests_monitor_path): + """Drop rows: unmuted in CI, or whole issue on TTL miss.""" + rows = fetch_all_rows(ydb_wrapper, table_path) + if not rows: + return + + now = datetime.datetime.now(tz=datetime.timezone.utc) + run_url = workflow_run_url() + ttl_days = get_manual_unmute_ttl_calendar_days() + ttl_delta = datetime.timedelta(days=ttl_days) + + grouped = {} + for row in rows: + key = (row.get('branch'), row.get('build_type')) + if not key[0] or not key[1]: + continue + grouped.setdefault(key, []).append(row) + + affected_issues = set() + issues_cleared_via_unmute = set() + unmuted_tests_by_issue = defaultdict(list) + delete_count = 0 + grace_table_path = None + try: + grace_table_path = ydb_wrapper.get_table_path('fast_unmute_grace') + create_fast_unmute_grace_table(ydb_wrapper, grace_table_path) + except KeyError: + pass + + grace_ttl_snapshot = grace_ttl_calendar_days( + get_mute_window_days(), get_manual_unmute_window_days() + ) + + issues_ttl_shutdown = set() + ttl_stuck_tests_by_issue = defaultdict(list) + + for (branch, build_type), group_rows in grouped.items(): + currently_muted = fetch_currently_muted(ydb_wrapper, tests_monitor_path, branch, build_type) + for row in group_rows: + full_name = row.get('full_name') + if not full_name: + continue + requested_at = _coerce_dt(row.get('requested_at')) + issue_number = row.get('github_issue_number') + + if full_name not in currently_muted: + if grace_table_path: + try: + ft_at = requested_at or now + upsert_fast_unmute_grace_row( + ydb_wrapper, + grace_table_path, + full_name, + branch, + build_type, + int(issue_number or 0), + ft_at, + now, + grace_ttl_snapshot, + ) + except Exception as exc: + logging.warning('Failed to record fast-unmute grace for %s: %s', full_name, exc) + delete_row(ydb_wrapper, table_path, full_name, branch, build_type) + delete_count += 1 + if issue_number: + inum = int(issue_number) + affected_issues.add(inum) + issues_cleared_via_unmute.add(inum) + unmuted_tests_by_issue[inum].append(full_name) + logging.info('manual_unmute_cleanup: %s (already unmuted)', full_name) + continue + + if requested_at and (now - requested_at) > ttl_delta: + if issue_number: + inum = int(issue_number) + issues_ttl_shutdown.add(inum) + ttl_stuck_tests_by_issue[inum].append(full_name) + affected_issues.add(inum) + logging.info( + 'manual_unmute_cleanup: %s (ttl %s calendar days exceeded, still muted)', + full_name, + ttl_days, + ) + continue + + ttl_bulk_removed_by_issue = defaultdict(set) + if issues_ttl_shutdown: + for row in fetch_all_rows(ydb_wrapper, table_path): + inn = row.get('github_issue_number') + if inn is None or int(inn) not in issues_ttl_shutdown: + continue + fn, br, bt = row.get('full_name'), row.get('branch'), row.get('build_type') + if not fn or not br or not bt: + continue + delete_row(ydb_wrapper, table_path, fn, br, bt) + delete_count += 1 + ttl_bulk_removed_by_issue[int(inn)].add(fn) + logging.info('manual_unmute_cleanup: %s (bulk clear issue #%s after ttl)', fn, int(inn)) + + if affected_issues: + remaining = count_rows_per_issue(ydb_wrapper, table_path, affected_issues) + issues_to_delabel = {num for num in affected_issues if remaining.get(num, 0) == 0} + issue_ids = fetch_issue_node_ids(affected_issues) + + for issue_number in sorted(issues_ttl_shutdown): + issue_id = issue_ids.get(issue_number) + if not issue_id: + logging.warning( + 'manual_unmute: ttl shutdown for issue #%s: YDB cleared but no GitHub node id', + issue_number, + ) + continue + stuck = sorted(set(ttl_stuck_tests_by_issue.get(issue_number, []))) + graduated = sorted(set(unmuted_tests_by_issue.get(issue_number, []))) + bulk_all = sorted(ttl_bulk_removed_by_issue.get(issue_number, set())) + cleared_other = sorted(set(bulk_all) - set(stuck)) + reopen_issue(issue_id) + add_issue_comment( + issue_id, + COMMENT_TTL_INCOMPLETE.format( + ttl_days=ttl_days, + graduated_bullets=format_bullet_list(graduated) + if graduated + else '- _(none)_', + stuck_bullets=format_bullet_list(stuck) if stuck else '- _(none)_', + cleared_other_bullets=format_bullet_list(cleared_other) + if cleared_other + else '- _(none)_', + workflow_run_url=run_url, + ), + ) + set_manual_unmute_project_board_status( + issue_id, PROJECT_STATUS_ON_FAST_UNMUTE_FAIL + ) + + for issue_number in sorted(issues_cleared_via_unmute): + if remaining.get(issue_number, 0) == 0: + continue + if issue_number in issues_ttl_shutdown: + continue + issue_id = issue_ids.get(issue_number) + if not issue_id: + continue + names = sorted(set(unmuted_tests_by_issue.get(issue_number, []))) + if not names: + continue + add_issue_comment( + issue_id, + COMMENT_PROGRESS.format( + unmuted_bullets=format_bullet_list(names), + workflow_run_url=run_url, + ), + ) + + success_comment_issues = ( + issues_to_delabel + & issues_cleared_via_unmute + - issues_ttl_shutdown + ) + for issue_number in sorted(success_comment_issues): + issue_id = issue_ids.get(issue_number) + if not issue_id: + continue + add_issue_comment( + issue_id, + COMMENT_SUCCESS.format(workflow_run_url=run_url), + ) + set_manual_unmute_project_board_status( + issue_id, PROJECT_STATUS_ON_FAST_UNMUTE_SUCCESS + ) + + for issue_number in issues_to_delabel: + issue_id = issue_ids.get(issue_number) + if issue_id: + remove_label_from_issue(issue_id) + + logging.info('manual_unmute_cleanup: removed %d row(s)', delete_count) + + +def sync(ydb_wrapper): + config = load_config() + + table_path = ydb_wrapper.get_table_path('fast_unmute_active') + issues_table_path = ydb_wrapper.get_table_path('issues') + tests_monitor_path = ydb_wrapper.get_table_path('tests_monitor') + + create_manual_unmute_table(ydb_wrapper, table_path) + try: + grace_table_path = ydb_wrapper.get_table_path('fast_unmute_grace') + create_fast_unmute_grace_table(ydb_wrapper, grace_table_path) + except KeyError: + grace_table_path = None + + abandon_fast_unmute_if_issue_not_completed(ydb_wrapper, table_path, grace_table_path) + + enter_manual_unmute( + ydb_wrapper, + table_path, + issues_table_path, + tests_monitor_path, + config['window_days'], + config['min_runs'], + ) + cleanup_manual_unmute(ydb_wrapper, table_path, tests_monitor_path) + if grace_table_path: + expire_fast_unmute_grace(ydb_wrapper, grace_table_path) diff --git a/.github/scripts/tests/mute/fast_unmute_ydb.py b/.github/scripts/tests/mute/fast_unmute_ydb.py new file mode 100644 index 00000000000..84ff0d97cce --- /dev/null +++ b/.github/scripts/tests/mute/fast_unmute_ydb.py @@ -0,0 +1,255 @@ +"""YDB access for ``fast_unmute_active`` and ``fast_unmute_grace``.""" + +import datetime +import logging +import os + +import ydb + +from mute.constants import get_manual_unmute_currently_muted_lookback_days + +_SIMULATE_UNMUTED_ENV = 'MANUAL_UNMUTE_SIMULATE_UNMUTED' + + +def _simulate_unmuted_full_names(): + raw = os.environ.get(_SIMULATE_UNMUTED_ENV, '').strip() + if not raw: + return frozenset() + return frozenset(x.strip() for x in raw.split(',') if x.strip()) + + +def _escape(value): + return str(value).replace("'", "''") + + +def _coerce_dt(value): + if value is None: + return None + if isinstance(value, datetime.datetime): + if value.tzinfo is None: + return value.replace(tzinfo=datetime.timezone.utc) + return value.astimezone(datetime.timezone.utc) + if isinstance(value, datetime.date): + return datetime.datetime.combine(value, datetime.time.min, tzinfo=datetime.timezone.utc) + if isinstance(value, int): + return datetime.datetime.fromtimestamp(value / 1_000_000, tz=datetime.timezone.utc) + if isinstance(value, float): + return datetime.datetime.fromtimestamp(value, tz=datetime.timezone.utc) + return None + + +def create_manual_unmute_table(ydb_wrapper, table_path): + create_sql = f""" + CREATE TABLE IF NOT EXISTS `{table_path}` ( + `full_name` Utf8 NOT NULL, + `branch` Utf8 NOT NULL, + `build_type` Utf8 NOT NULL, + `github_issue_number` Uint64 NOT NULL, + `requested_at` Timestamp NOT NULL, + `window_days` Uint32 NOT NULL, + PRIMARY KEY (full_name, branch, build_type) + ) + WITH (STORE = COLUMN) + """ + ydb_wrapper.create_table(table_path, create_sql) + + +def fetch_all_rows(ydb_wrapper, table_path): + query = f""" + SELECT full_name, branch, build_type, github_issue_number, requested_at, window_days + FROM `{table_path}` + """ + return ydb_wrapper.execute_scan_query(query, query_name='manual_unmute_fetch_all') + + +def count_rows_per_issue(ydb_wrapper, table_path, issue_numbers): + """Return {issue_number: remaining_row_count} for the given issues.""" + numbers = sorted({int(n) for n in (issue_numbers or []) if n is not None}) + if not numbers: + return {} + in_list = ','.join(str(n) for n in numbers) + query = f""" + SELECT github_issue_number AS n, COUNT(*) AS c + FROM `{table_path}` + WHERE github_issue_number IN ({in_list}) + GROUP BY github_issue_number + """ + rows = ydb_wrapper.execute_scan_query(query, query_name='manual_unmute_count_remaining') + return {int(r['n']): int(r['c']) for r in rows if r.get('n') is not None} + + +def fetch_candidate_issues(ydb_wrapper, issues_table_path, lookback_days): + query = f""" + SELECT issue_id, issue_number, body + FROM `{issues_table_path}` + WHERE state = 'CLOSED' + AND state_reason = 'COMPLETED' + AND closed_at >= CurrentUtcTimestamp() - {int(lookback_days)} * Interval("P1D") + """ + return ydb_wrapper.execute_scan_query(query, query_name='manual_unmute_candidate_issues') + + +def fetch_currently_muted(ydb_wrapper, tests_monitor_path, branch, build_type): + lb = int(get_manual_unmute_currently_muted_lookback_days()) + br = _escape(branch) + bt = _escape(build_type) + query = f""" + SELECT t.full_name AS full_name + FROM `{tests_monitor_path}` AS t + INNER JOIN ( + SELECT full_name AS fn, MAX(date_window) AS max_date_window + FROM `{tests_monitor_path}` + WHERE branch = '{br}' + AND build_type = '{bt}' + AND date_window >= CurrentUtcDate() - {lb} * Interval("P1D") + GROUP BY full_name + ) AS last_row + ON t.full_name = last_row.fn AND t.date_window = last_row.max_date_window + WHERE t.branch = '{br}' + AND t.build_type = '{bt}' + AND t.is_muted = 1 + """ + rows = ydb_wrapper.execute_scan_query(query, query_name='manual_unmute_currently_muted') + result = {row['full_name'] for row in rows if row.get('full_name')} + pretend_unmuted = _simulate_unmuted_full_names() + if pretend_unmuted: + logging.warning( + '%s active — excluding from currently-muted (simulate is_muted=0): %s', + _SIMULATE_UNMUTED_ENV, + ', '.join(sorted(pretend_unmuted)), + ) + result -= pretend_unmuted + return result + + +def create_fast_unmute_grace_table(ydb_wrapper, table_path): + create_sql = f""" + CREATE TABLE IF NOT EXISTS `{table_path}` ( + `full_name` Utf8 NOT NULL, + `branch` Utf8 NOT NULL, + `build_type` Utf8 NOT NULL, + `github_issue_number` Uint64 NOT NULL, + `fast_track_requested_at` Timestamp NOT NULL, + `grace_started_at` Timestamp NOT NULL, + `grace_ttl_days` Uint32 NOT NULL, + PRIMARY KEY (full_name, branch, build_type) + ) + WITH (STORE = COLUMN) + """ + ydb_wrapper.create_table(table_path, create_sql) + + +def upsert_fast_unmute_grace_row( + ydb_wrapper, + table_path, + full_name, + branch, + build_type, + github_issue_number, + fast_track_requested_at, + grace_started_at, + grace_ttl_days, +): + column_types = ( + ydb.BulkUpsertColumns() + .add_column('full_name', ydb.PrimitiveType.Utf8) + .add_column('branch', ydb.PrimitiveType.Utf8) + .add_column('build_type', ydb.PrimitiveType.Utf8) + .add_column('github_issue_number', ydb.PrimitiveType.Uint64) + .add_column('fast_track_requested_at', ydb.PrimitiveType.Timestamp) + .add_column('grace_started_at', ydb.PrimitiveType.Timestamp) + .add_column('grace_ttl_days', ydb.PrimitiveType.Uint32) + ) + rows = [ + { + 'full_name': full_name, + 'branch': branch, + 'build_type': build_type, + 'github_issue_number': int(github_issue_number), + 'fast_track_requested_at': fast_track_requested_at, + 'grace_started_at': grace_started_at, + 'grace_ttl_days': int(grace_ttl_days), + } + ] + ydb_wrapper.bulk_upsert(table_path, rows, column_types) + + +def expire_fast_unmute_grace(ydb_wrapper, table_path): + """Remove grace rows after ``grace_ttl_days`` calendar days since ``grace_started_at``.""" + query = f""" + SELECT full_name, branch, build_type, grace_started_at, grace_ttl_days + FROM `{table_path}` + """ + try: + rows = ydb_wrapper.execute_scan_query(query, query_name='fast_unmute_grace_expire_scan') + except Exception as exc: + logging.warning('expire_fast_unmute_grace: scan failed: %s', exc) + return + + today = datetime.datetime.now(tz=datetime.timezone.utc).date() + + for row in rows: + gs = _coerce_dt(row.get('grace_started_at')) + if not gs: + continue + gs_date = gs.astimezone(datetime.timezone.utc).date() + threshold = max(1, int(row['grace_ttl_days'])) + if (today - gs_date).days >= threshold: + delete_grace_row( + ydb_wrapper, + table_path, + row.get('full_name'), + row.get('branch'), + row.get('build_type'), + ) + + +def delete_grace_row(ydb_wrapper, table_path, full_name, branch, build_type): + if not full_name or not branch or not build_type: + return + query = f""" + DECLARE $full_name AS Utf8; + DECLARE $branch AS Utf8; + DECLARE $build_type AS Utf8; + DELETE FROM `{table_path}` + WHERE full_name = $full_name + AND branch = $branch + AND build_type = $build_type; + """ + ydb_wrapper.execute_dml( + query, + {'$full_name': full_name, '$branch': branch, '$build_type': build_type}, + query_name='fast_unmute_grace_delete', + ) + + +def upsert_rows(ydb_wrapper, table_path, rows): + if not rows: + return + column_types = ( + ydb.BulkUpsertColumns() + .add_column('full_name', ydb.PrimitiveType.Utf8) + .add_column('branch', ydb.PrimitiveType.Utf8) + .add_column('build_type', ydb.PrimitiveType.Utf8) + .add_column('github_issue_number', ydb.PrimitiveType.Uint64) + .add_column('requested_at', ydb.PrimitiveType.Timestamp) + .add_column('window_days', ydb.PrimitiveType.Uint32) + ) + ydb_wrapper.bulk_upsert(table_path, rows, column_types) + + +def delete_row(ydb_wrapper, table_path, full_name, branch, build_type): + query = f""" + DECLARE $full_name AS Utf8; + DECLARE $branch AS Utf8; + DECLARE $build_type AS Utf8; + DELETE FROM `{table_path}` + WHERE full_name = $full_name + AND branch = $branch + AND build_type = $build_type; + """ + ydb_wrapper.execute_dml( + query, + {'$full_name': full_name, '$branch': branch, '$build_type': build_type}, + query_name='manual_unmute_delete_row', + ) diff --git a/.github/scripts/tests/mute/manual_unmute.py b/.github/scripts/tests/mute/manual_unmute.py new file mode 100644 index 00000000000..1816c23c337 --- /dev/null +++ b/.github/scripts/tests/mute/manual_unmute.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 + +"""CLI: ``sync`` manual fast-unmute (YDB ``fast_unmute_active``, GitHub label/comments, project status). + +Logic lives in ``mute.fast_unmute_*``; needs ``GITHUB_TOKEN``. Optional test env +``MANUAL_UNMUTE_SIMULATE_UNMUTED`` (comma-separated ``full_name``) — see ``mute.fast_unmute_ydb``. + +Run: ``python3 .../manual_unmute.py sync`` (``-v`` = log enter skip reasons). +""" + +import argparse +import logging +import os +import sys + +_mutedir = os.path.dirname(os.path.abspath(__file__)) +_tests_dir = os.path.dirname(_mutedir) +_scripts_dir = os.path.dirname(_tests_dir) +for _p in (_scripts_dir, os.path.join(_scripts_dir, 'analytics'), _tests_dir): + if _p not in sys.path: + sys.path.insert(0, _p) + +from mute.fast_unmute_pipeline import sync +from ydb_wrapper import YDBWrapper + +_LOG = logging.getLogger('manual_unmute') + + +def main(): + logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(message)s') + + if not os.environ.get('GITHUB_TOKEN'): + logging.error( + 'GITHUB_TOKEN is required for GitHub GraphQL (issue close, labels, timeline). ' + 'Set it in workflow env or export it when running locally.' + ) + return 1 + + parser = argparse.ArgumentParser(description='Manual fast-unmute state machine') + subparsers = parser.add_subparsers(dest='mode', required=True) + sync_parser = subparsers.add_parser( + 'sync', + help='Enter new rows and clean up stale/failed/unmuted rows', + ) + sync_parser.add_argument( + '-v', + '--verbose', + action='store_true', + help='Log enter-phase skip reasons (does not enable ydb/grpc DEBUG)', + ) + args = parser.parse_args() + if getattr(args, 'verbose', False): + _LOG.setLevel(logging.DEBUG) + + with YDBWrapper() as ydb_wrapper: + if not ydb_wrapper.check_credentials(): + return 1 + sync(ydb_wrapper) + return 0 + + +if __name__ == '__main__': + raise SystemExit(main()) diff --git a/.github/scripts/tests/mute/naming.py b/.github/scripts/tests/mute/naming.py new file mode 100644 index 00000000000..531ee6e3d31 --- /dev/null +++ b/.github/scripts/tests/mute/naming.py @@ -0,0 +1,17 @@ +"""Map ``muted_ya`` / ``to_mute`` line format to ``tests_monitor.full_name``.""" + + +def mute_file_line_to_tests_monitor_full_name(line: str) -> str: + """Convert ``suite_folder test_name`` to monitor ``suite_folder/test_name``. + + Wildcard patterns are returned unchanged (callers usually skip them for YDB key match). + Lines without a separating space are returned unchanged (already ``full_name`` or opaque). + """ + if not line: + return line + if '*' in line or '?' in line: + return line + if ' ' not in line: + return line + suite_folder, test_name = line.rsplit(' ', 1) + return f'{suite_folder}/{test_name}' diff --git a/.github/scripts/tests/update_mute_issues.py b/.github/scripts/tests/mute/update_mute_issues.py index 93af746c39c..b3006f573ee 100755 --- a/.github/scripts/tests/update_mute_issues.py +++ b/.github/scripts/tests/mute/update_mute_issues.py @@ -3,14 +3,18 @@ import sys import requests from urllib.parse import quote_plus -# Import shared GitHub issue utilities -sys.path.append(os.path.join(os.path.dirname(__file__), '..')) +# Import shared GitHub issue utilities (``mute/`` → ``tests/`` → ``.github/scripts/``). +_scripts_dir = os.path.normpath(os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..')) +if _scripts_dir not in sys.path: + sys.path.insert(0, _scripts_dir) from github_issue_utils import parse_body, DEFAULT_BUILD_TYPE ORG_NAME = 'ydb-platform' REPO_NAME = 'ydb' PROJECT_ID = '45' +# GitHub issue label set by ``mute/fast_unmute_github.py`` while fast-unmute rows exist. +MANUAL_FAST_UNMUTE_GITHUB_LABEL = 'manual-fast-unmute' TEST_HISTORY_DASHBOARD = "https://datalens.yandex/4un3zdm0zcnyr" CURRENT_TEST_HISTORY_DASHBOARD = "https://datalens.yandex/34xnbsom67hcq?" @@ -271,6 +275,11 @@ def fetch_all_issues(org_name=ORG_NAME, project_id=PROJECT_ID): state body createdAt + labels(first: 40) { + nodes { + name + } + } } } fieldValues(first: 20) { @@ -347,7 +356,6 @@ def generate_github_issue_title_and_body(test_data): branch = test_data[0]['branch'] build_type = test_data[0].get('build_type', DEFAULT_BUILD_TYPE) test_full_names = [f"{d['full_name']}" for d in test_data] - test_mute_strings = [f"{d['mute_string']}" for d in test_data] summary = [ f"{d['test_name']}: {d['state']} last {d['days_in_state']} days, at {d['date_window']}: success_rate {d['success_rate']}%, {d['summary']}" for d in test_data @@ -363,8 +371,6 @@ def generate_github_issue_title_and_body(test_data): # Преобразование списка тестов в строку и кодирование test_string = "\n".join(test_full_names) - test_mute_strings_string = "\n".join(test_mute_strings) - summary_string = "\n".join(summary) # Создаем ссылку на историю тестов, кодируя параметры @@ -388,12 +394,12 @@ def generate_github_issue_title_and_body(test_data): f"Build type:<!--build_type_list_start-->\n" f"{build_type}\n" f"<!--build_type_list_end-->\n\n" - f"**Add line to [muted_ya.txt](https://github.com/ydb-platform/ydb/blob/main/.github/config/muted_ya.txt):**\n" - "```\n" - f"{test_mute_strings_string}\n" - "```\n\n" f"Owner: {owner}\n\n" - "**Read more in [mute_rules.md](https://github.com/ydb-platform/ydb/blob/main/.github/config/mute_rules.md)**\n\n" + "**About this issue**\n" + "- Auto-created by the mute workflow.\n" + "- Bot closes when all listed tests are unmuted or no longer exist.\n" + "- Close as **Completed** to trigger fast-unmute.\n" + "- [mute_rules.md](https://github.com/ydb-platform/ydb/blob/main/.github/config/mute_rules.md) — full details.\n\n" f"**Summary history:** \n {summary_string}\n" "\n\n" f"**Test run history:** [link]({test_run_history_link})\n\n" @@ -408,11 +414,23 @@ def generate_github_issue_title_and_body(test_data): def get_issues_and_tests_from_project(ORG_NAME, PROJECT_ID): + """Project items → issue index. + + Includes **open** issues and **closed** issues only when the GitHub label + ``manual-fast-unmute`` is present (fast-unmute without reopening the issue). + Other closed cards are skipped so downstream logic matches project 45 only. + """ issues = fetch_all_issues(ORG_NAME, PROJECT_ID) all_issues_with_contet = {} for issue in issues: content = issue['content'] if content: + state = content.get('state') + label_nodes = (content.get('labels') or {}).get('nodes') or [] + label_names = [n['name'] for n in label_nodes if n and n.get('name')] + if state == 'CLOSED' and MANUAL_FAST_UNMUTE_GITHUB_LABEL not in label_names: + continue + body = content['body'] parsed = parse_body(body) @@ -451,6 +469,7 @@ def get_issues_and_tests_from_project(ORG_NAME, PROJECT_ID): all_issues_with_contet[content['id']]['tests'] = [] all_issues_with_contet[content['id']]['branches'] = parsed.branches all_issues_with_contet[content['id']]['build_type'] = parsed.build_type + all_issues_with_contet[content['id']]['labels'] = label_names for test in parsed.tests: all_issues_with_contet[content['id']]['tests'].append(test) @@ -460,26 +479,46 @@ def get_issues_and_tests_from_project(ORG_NAME, PROJECT_ID): return all_issues_with_contet -def get_muted_tests_from_issues(): - issues = get_issues_and_tests_from_project(ORG_NAME, PROJECT_ID) +def map_tests_to_manual_fast_unmute_issue_url(issues_dict): + """``(full_name, build_type)`` → issue URL for issues whose ``labels`` include fast-unmute. + + Expects the filtered project index from ``get_issues_and_tests_from_project``. + """ + out = {} + for _issue_id, info in (issues_dict or {}).items(): + labels = info.get('labels') or [] + if MANUAL_FAST_UNMUTE_GITHUB_LABEL not in labels: + continue + bt = info.get('build_type') or DEFAULT_BUILD_TYPE + url = info.get('url') + for test in info.get('tests') or []: + key = (test, bt) + if key not in out: + out[key] = url + return out + + +def get_muted_tests_from_issues(issues_dict=None): + if issues_dict is None: + issues_dict = get_issues_and_tests_from_project(ORG_NAME, PROJECT_ID) muted_tests = {} - + # First, collect all issues for each (test, build_type) key - for issue in issues: - if issues[issue]["state"] != 'CLOSED': - bt = issues[issue].get('build_type') or DEFAULT_BUILD_TYPE - for test in issues[issue]['tests']: + for issue in issues_dict: + if issues_dict[issue]["state"] != 'CLOSED': + bt = issues_dict[issue].get('build_type') or DEFAULT_BUILD_TYPE + for test in issues_dict[issue]['tests']: key = (test, bt) if key not in muted_tests: muted_tests[key] = [] muted_tests[key].append( { - 'url': issues[issue]['url'], - 'createdAt': issues[issue]['createdAt'], - 'status_updated': issues[issue]['status_updated'], - 'status': issues[issue]['status'], - 'state': issues[issue]['state'], - 'branches': issues[issue]['branches'], + 'url': issues_dict[issue]['url'], + 'createdAt': issues_dict[issue]['createdAt'], + 'status_updated': issues_dict[issue]['status_updated'], + 'status': issues_dict[issue]['status'], + 'state': issues_dict[issue]['state'], + 'branches': issues_dict[issue]['branches'], 'build_type': bt, 'id': issue, } |
