summaryrefslogtreecommitdiffstats
path: root/.github/scripts/github_issue_utils.py
diff options
context:
space:
mode:
authorKirill Rysin <[email protected]>2026-04-08 14:17:46 +0200
committerGitHub <[email protected]>2026-04-08 15:17:46 +0300
commitc7d57dfe0fbf696215040469558aeb2a2fb81d52 (patch)
tree832d7aa6dd49716ea50e26daaf45df238ce7e16c /.github/scripts/github_issue_utils.py
parent1864747253932624d9201f3c2fd904cd257e29a4 (diff)
Dev new automerge mute (#37417)
Co-authored-by: Cursor Agent <[email protected]> Co-authored-by: Kirill Rysin <[email protected]>
Diffstat (limited to '.github/scripts/github_issue_utils.py')
-rw-r--r--.github/scripts/github_issue_utils.py242
1 files changed, 203 insertions, 39 deletions
diff --git a/.github/scripts/github_issue_utils.py b/.github/scripts/github_issue_utils.py
index e21107d29be..a0052935362 100644
--- a/.github/scripts/github_issue_utils.py
+++ b/.github/scripts/github_issue_utils.py
@@ -5,32 +5,182 @@ Shared utilities for working with GitHub issues and parsing test names from issu
Used by both the muted test analytics and issue management scripts.
"""
+import datetime as dt
import re
+from collections import defaultdict
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional, Tuple
-def parse_body(body):
- """Parse GitHub issue body to extract test names and branches
-
+DEFAULT_BUILD_TYPE = 'relwithdebinfo'
+DEFAULT_BRANCH = 'main'
+
+
+def scan_to_utc_date(val) -> Optional[dt.date]:
+ """YDB scan value → UTC calendar date.
+
+ Handles both native_date_in_result_sets=True (returns dt.date / dt.datetime)
+ and native_date_in_result_sets=False (returns int):
+ - Date → uint32 days since 1970-01-01 (< 100_000)
+ - Datetime → uint32 seconds since 1970-01-01 (< 10_000_000_000)
+ - Timestamp → uint64 microseconds since 1970-01-01 (larger)
+ """
+ if val is None:
+ return None
+ if isinstance(val, dt.datetime):
+ if val.tzinfo is not None:
+ return val.astimezone(dt.timezone.utc).date()
+ return val.date()
+ if isinstance(val, dt.date):
+ return val
+ if isinstance(val, int):
+ if val < 100_000:
+ # YDB Date: days since Unix epoch
+ return dt.date(1970, 1, 1) + dt.timedelta(days=val)
+ if val < 10_000_000_000:
+ # YDB Datetime: seconds since Unix epoch
+ return dt.datetime.fromtimestamp(val, tz=dt.timezone.utc).date()
+ # YDB Timestamp: microseconds since Unix epoch
+ return dt.datetime.fromtimestamp(val / 1_000_000, tz=dt.timezone.utc).date()
+ return None
+
+
+def normalize_analytics_area(raw) -> str:
+ """Match YQL ``$normalize``: first two ``/`` segments, else full string; empty → ``area/-``."""
+ if raw is None:
+ return "area/-"
+ s = str(raw).strip()
+ if not s:
+ return "area/-"
+ parts = s.split("/")
+ if len(parts) >= 2:
+ return f"{parts[0]}/{parts[1]}"
+ return s
+
+
+def monitor_owner_to_team_key(owner) -> str:
+ """Lowercase slug like SQL ``Unicode::ToLower(ReplaceAll(owner, 'TEAM:@ydb-platform/', ''))``."""
+ if owner is None:
+ return ""
+ s = str(owner).replace("TEAM:@ydb-platform/", "").strip()
+ return s.lower()
+
+
+def resolve_team_by_longest_area_prefix(normalized_area: str, area_to_owner: Dict[str, str]) -> Optional[str]:
+ """Longest mapping key where area equals key or starts with key + '/'."""
+ best = None
+ best_len = -1
+ for m_area, team in area_to_owner.items():
+ if not m_area:
+ continue
+ if normalized_area == m_area or normalized_area.startswith(m_area + "/"):
+ if len(m_area) > best_len:
+ best = team
+ best_len = len(m_area)
+ return best
+
+
+def area_to_owner_map_from_rows(rows: List[dict]) -> Dict[str, str]:
+ """Normalized area → owner_team (last wins if duplicates)."""
+ out: Dict[str, str] = {}
+ for r in rows:
+ a, ot = r.get("area"), r.get("owner_team")
+ if not a or not ot:
+ continue
+ out[normalize_analytics_area(str(a))] = str(ot).strip()
+ return out
+
+
+def min_area_by_owner_team_from_rows(rows: List[dict]) -> Dict[str, str]:
+ """Lowercase owner_team → ``MIN(normalize(area))`` lexicographic (same as SQL mart fallback)."""
+ by_ot: Dict[str, List[str]] = defaultdict(list)
+ for r in rows:
+ a, ot = r.get("area"), r.get("owner_team")
+ if not a or not ot:
+ continue
+ by_ot[str(ot).strip().lower()].append(normalize_analytics_area(str(a)))
+ return {k: min(v) for k, v in by_ot.items() if v}
+
+
+def pick_effective_analytics_area(
+ area_override,
+ area_override_since,
+ date_window: dt.date,
+ owner_team_key: str,
+ min_area_by_owner: Dict[str, str],
+) -> str:
+ if area_override is not None and str(area_override).strip():
+ na = normalize_analytics_area(area_override)
+ if na:
+ since = scan_to_utc_date(area_override_since)
+ if since is None or date_window >= since:
+ return na
+ return min_area_by_owner.get(owner_team_key) or "area/-"
+
+
+def effective_owner_team_for_area(
+ effective_area: str, area_to_owner: Dict[str, str], owner_team_key: str
+) -> str:
+ mapped = resolve_team_by_longest_area_prefix(effective_area, area_to_owner)
+ return str(mapped).strip().lower() if mapped else owner_team_key
+
+
+def compute_effective_analytics_row(
+ row: dict,
+ gim_by_key: Dict[Tuple[str, str, str], dict],
+ area_to_owner: Dict[str, str],
+ min_area_by_owner: Dict[str, str],
+) -> Tuple[str, str]:
+ otk = monitor_owner_to_team_key(row.get("owner"))
+ key = (str(row["full_name"]), str(row["branch"]), str(row["build_type"]))
+ g = gim_by_key.get(key, {})
+ dw = row["date_window"]
+ if isinstance(dw, dt.datetime):
+ dw = dw.date()
+ eff_area = pick_effective_analytics_area(
+ g.get("area_override"),
+ g.get("area_override_since"),
+ dw,
+ otk,
+ min_area_by_owner,
+ )
+ eff_ot = effective_owner_team_for_area(eff_area, area_to_owner, otk)
+ return eff_area, eff_ot
+
+
+@dataclass
+class ParsedIssueBody:
+ tests: List[str] = field(default_factory=list)
+ branches: List[str] = field(default_factory=lambda: ['main'])
+ build_type: str = DEFAULT_BUILD_TYPE
+
+
+def _extract_between_markers(body: str, start_marker: str, end_marker: str) -> Optional[str]:
+ """Return text between two HTML comment markers, or None if markers are absent."""
+ if start_marker not in body or end_marker not in body:
+ return None
+ idx1 = body.find(start_marker)
+ idx2 = body.find(end_marker)
+ return body[idx1 + len(start_marker) + 1 : idx2]
+
+
+def parse_body(body: str) -> ParsedIssueBody:
+ """Parse GitHub issue body to extract test names, branches and build_type.
+
Args:
- body (str): The GitHub issue body text
-
+ body: The GitHub issue body text
+
Returns:
- tuple: (tests, branches) - lists of test names and branch names
+ ParsedIssueBody with extracted fields (all have sensible defaults).
"""
- tests = []
- branches = []
- prepared_body = ''
- start_mute_list = "<!--mute_list_start-->"
- end_mute_list = "<!--mute_list_end-->"
- start_branch_list = "<!--branch_list_start-->"
- end_branch_list = "<!--branch_list_end-->"
+ result = ParsedIssueBody()
- # Extract tests
- if all(x in body for x in [start_mute_list, end_mute_list]):
- idx1 = body.find(start_mute_list)
- idx2 = body.find(end_mute_list)
- lines = body[idx1 + len(start_mute_list) + 1 : idx2].split('\n')
+ # --- tests ---
+ mute_block = _extract_between_markers(body, "<!--mute_list_start-->", "<!--mute_list_end-->")
+ if mute_block is not None:
+ lines = mute_block.split('\n')
else:
+ prepared_body = ''
if body.startswith('Mute:'):
prepared_body = body.split('Mute:', 1)[1].strip()
elif body.startswith('Mute'):
@@ -38,42 +188,55 @@ def parse_body(body):
elif body.startswith('ydb'):
prepared_body = body
lines = prepared_body.split('**Add line to')[0].split('\n')
- tests = [line.strip() for line in lines if line.strip().startswith('ydb/')]
+ result.tests = [line.strip() for line in lines if line.strip().startswith('ydb/')]
- # Extract branches
- if all(x in body for x in [start_branch_list, end_branch_list]):
- idx1 = body.find(start_branch_list)
- idx2 = body.find(end_branch_list)
- branches = [branch.strip() for branch in body[idx1 + len(start_branch_list) + 1 : idx2].split('\n') if branch.strip()]
- else:
- branches = ['main']
+ # --- branches ---
+ branch_block = _extract_between_markers(body, "<!--branch_list_start-->", "<!--branch_list_end-->")
+ if branch_block is not None:
+ result.branches = [b.strip() for b in branch_block.split('\n') if b.strip()]
+
+ # --- build_type ---
+ bt_block = _extract_between_markers(body, "<!--build_type_list_start-->", "<!--build_type_list_end-->")
+ if bt_block is not None:
+ val = bt_block.strip()
+ if val:
+ result.build_type = val
- return tests, branches
+ return result
+
+
+def make_profile_id(branch: str, build_type: str) -> str:
+ """Canonical profile_id used by digest_queue and notification config.
+
+ Format is ``branch:build_type`` (colon) so build presets like ``release-asan``
+ stay unambiguous. Legacy rows may still use ``branch-build_type``; migrate
+ those in YDB if you need them picked up by the new code.
+ """
+ return f"{branch}:{build_type}"
def create_test_issue_mapping(issues_data):
"""Create a mapping from test names to GitHub issue information
-
+
Args:
issues_data (list): List of issue dictionaries with 'body', 'url', 'title', 'issue_number' fields
-
+
Returns:
dict: Mapping from test name to list of issue information
"""
test_to_issue = {}
-
+
for issue in issues_data:
body = issue.get('body', '')
url = issue.get('url', '')
-
+
if not body or not url:
continue
-
+
try:
- # Use the parse_body function to extract tests and branches
- tests, branches = parse_body(body)
-
- for test in tests:
+ parsed = parse_body(body)
+
+ for test in parsed.tests:
if test not in test_to_issue:
test_to_issue[test] = []
test_to_issue[test].append({
@@ -82,10 +245,11 @@ def create_test_issue_mapping(issues_data):
'issue_number': issue.get('issue_number', 0),
'state': issue.get('state', ''),
'created_at': issue.get('created_at', 0),
- 'branches': branches
+ 'branches': parsed.branches,
+ 'build_type': parsed.build_type,
})
except Exception as e:
print(f"Warning: Could not parse issue body for issue {url}: {e}")
continue
-
- return test_to_issue \ No newline at end of file
+
+ return test_to_issue