#!/usr/bin/env python3 """ Collect step summaries from all matrix jobs and generate a combined markdown table. This script: 1. Collects GITHUB_STEP_SUMMARY files from all matrix jobs (via artifacts) 2. Parses test statistics from each summary 3. Generates a combined markdown table with branches as rows and build_presets as columns 4. Writes the combined summary to GITHUB_STEP_SUMMARY """ import argparse import json import os import re import sys from pathlib import Path from typing import Dict, List, Optional, Tuple def parse_all_attempts(summary_text: str) -> List[Dict]: """ Parse all attempt tables from markdown summary. Returns: list of dicts, each containing stats and URL for one attempt (try_1, try_2, try_3, etc.) """ if not summary_text: return [] # Pattern to match markdown table with test statistics pattern = r'\|\s*TESTS\s*\|\s*PASSED\s*\|\s*ERRORS\s*\|\s*FAILED\s*\|\s*SKIPPED\s*\|\s*MUTED' url_pattern = r'https://storage\.yandexcloud\.net/[^\s\)]+ya-test\.html[^\s\)]*' lines = summary_text.split('\n') # Find all table headers table_starts = [] for i, line in enumerate(lines): if re.search(pattern, line, re.IGNORECASE): table_starts.append(i) if not table_starts: return [] attempts = [] def extract_number(s): match = re.search(r'\[?(\d+)\]?', s) return int(match.group(1)) if match else 0 # Parse each table for table_idx, start_idx in enumerate(table_starts): attempt_num = table_idx + 1 stats = { 'tests': 0, 'passed': 0, 'errors': 0, 'failed': 0, 'skipped': 0, 'muted': 0 } attempt_url = None # Find data row for this table i = start_idx + 1 while i < len(lines): line = lines[i].strip() if '---' in line: i += 1 continue if line.startswith('|') and line.count('|') >= 6: parts = [p.strip() for p in line.split('|')] parts = [p for p in parts if p] if len(parts) >= 6: try: stats['tests'] = extract_number(parts[0]) stats['passed'] = extract_number(parts[1]) stats['errors'] = extract_number(parts[2]) stats['failed'] = extract_number(parts[3]) stats['skipped'] = extract_number(parts[4]) stats['muted'] = extract_number(parts[5]) # Extract URL from FAILED or TESTS column failed_cell = parts[3] if len(parts) > 3 else '' tests_cell = parts[0] if len(parts) > 0 else '' match = re.search(url_pattern, failed_cell) if match: attempt_url = match.group(0) else: match = re.search(url_pattern, tests_cell) if match: attempt_url = match.group(0) break except (ValueError, IndexError): pass # Check if next line is a new table header if i + 1 < len(lines) and re.search(pattern, lines[i + 1], re.IGNORECASE): break i += 1 if stats['tests'] > 0 or stats['failed'] > 0 or stats['errors'] > 0 or attempt_url: attempts.append({ 'attempt': attempt_num, 'stats': stats, 'url': attempt_url }) return attempts def parse_summary_markdown(summary_text: str) -> Optional[Dict]: """ Parse test statistics from markdown summary. Looks for markdown table format: | TESTS | PASSED | ERRORS | FAILED | SKIPPED | MUTED | There can be multiple tables (for retries: try_1, try_2, try_3). We only take the LAST table (last attempt) for combined summary. Returns: dict with test statistics from last attempt or None if not found """ if not summary_text: return None # Pattern to match markdown table with test statistics pattern = r'\|\s*TESTS\s*\|\s*PASSED\s*\|\s*ERRORS\s*\|\s*FAILED\s*\|\s*SKIPPED\s*\|\s*MUTED' lines = summary_text.split('\n') # Find all table headers table_starts = [] for i, line in enumerate(lines): if re.search(pattern, line, re.IGNORECASE): table_starts.append(i) if not table_starts: return None # Use the LAST table (last attempt) start_idx = table_starts[-1] stats = { 'tests': 0, 'passed': 0, 'errors': 0, 'failed': 0, 'skipped': 0, 'muted': 0 } def extract_number(s): # Handle format: [624](url) or just 624 match = re.search(r'\[?(\d+)\]?', s) return int(match.group(1)) if match else 0 # Look for data row after the header (only from last table) i = start_idx + 1 while i < len(lines): line = lines[i].strip() # Skip separator line if '---' in line: i += 1 continue # Check if this is a data row if line.startswith('|') and line.count('|') >= 6: # Parse the row - this is the data from last attempt parts = [p.strip() for p in line.split('|')] parts = [p for p in parts if p] # Remove empty if len(parts) >= 6: try: # Take values from last attempt only (don't sum) stats['tests'] = extract_number(parts[0]) stats['passed'] = extract_number(parts[1]) stats['errors'] = extract_number(parts[2]) stats['failed'] = extract_number(parts[3]) stats['skipped'] = extract_number(parts[4]) stats['muted'] = extract_number(parts[5]) # Found the data row, break break except (ValueError, IndexError): pass # Check if next line is a new table header (shouldn't happen for last table, but just in case) if i + 1 < len(lines) and re.search(pattern, lines[i + 1], re.IGNORECASE): # This shouldn't happen since we're using last table, but break anyway break i += 1 # Only return stats if we found data if stats['tests'] > 0 or stats['failed'] > 0 or stats['errors'] > 0: return stats return None def extract_test_report_url(summary_text: str) -> Optional[str]: """ Extract test report URL from summary. Takes URL from the LAST attempt (last table) - from FAILED column if available, otherwise from TESTS column. """ if not summary_text: return None # Pattern to match markdown table with test statistics table_pattern = r'\|\s*TESTS\s*\|\s*PASSED\s*\|\s*ERRORS\s*\|\s*FAILED\s*\|\s*SKIPPED\s*\|\s*MUTED' lines = summary_text.split('\n') # Find all table headers table_starts = [] for i, line in enumerate(lines): if re.search(table_pattern, line, re.IGNORECASE): table_starts.append(i) if not table_starts: return None # Use the LAST table (last attempt) start_idx = table_starts[-1] # Pattern for storage.yandexcloud.net URLs with ya-test.html url_pattern = r'https://storage\.yandexcloud\.net/[^\s\)]+ya-test\.html[^\s\)]*' # Look for data row after the header and extract URL from FAILED column (preferred) or TESTS column i = start_idx + 1 while i < len(lines): line = lines[i].strip() # Skip separator line if '---' in line: i += 1 continue # Check if this is a data row if line.startswith('|') and line.count('|') >= 6: # Parse the row parts = [p.strip() for p in line.split('|')] parts = [p for p in parts if p] # Remove empty if len(parts) >= 6: # Try FAILED column first (column index 3, but parts[0] is TESTS, parts[1] is PASSED, etc.) # Actually, parts array: [TESTS, PASSED, ERRORS, FAILED, SKIPPED, MUTED] # So FAILED is at index 3 failed_cell = parts[3] if len(parts) > 3 else '' tests_cell = parts[0] if len(parts) > 0 else '' # Try to extract URL from FAILED column first match = re.search(url_pattern, failed_cell) if match: return match.group(0) # Fallback to TESTS column match = re.search(url_pattern, tests_cell) if match: return match.group(0) # Found the data row, break break i += 1 # Fallback: search in entire summary (but prefer last attempt) # Search backwards from end to find last URL all_matches = list(re.finditer(url_pattern, summary_text)) if all_matches: return all_matches[-1].group(0) return None def extract_job_info_from_name(job_name: str) -> Tuple[Optional[str], Optional[str]]: """ Extract branch and build_preset from job name. Format: "Regression-run_... (build_preset) / branch:build_preset" Returns: (branch, build_preset) """ branch = None build_preset = None if ':' in job_name: # Format: "branch:build_preset" parts = job_name.split(':') if len(parts) >= 2: branch = parts[0].strip() build_preset = parts[1].strip() elif ' / ' in job_name: # Format: "Regression-run_... (build_preset) / branch:build_preset" parts = job_name.split(' / ') if len(parts) >= 2: branch_preset = parts[-1].strip() if ':' in branch_preset: branch, build_preset = branch_preset.split(':', 1) branch = branch.strip() build_preset = build_preset.strip() return branch, build_preset def collect_summaries_from_artifacts(artifacts_dir: str) -> Dict[str, Dict]: """ Collect summaries from artifact files. Artifacts are downloaded with pattern "job-summary-*" and contain files like: "summary-{branch}:{build_preset}.md" Returns: dict mapping "branch:build_preset" to summary data """ summaries = {} artifacts_path = Path(artifacts_dir) if not artifacts_path.exists(): print(f"Warning: Artifacts directory {artifacts_dir} does not exist", file=sys.stderr) return summaries # Look for summary files - they might be in subdirectories (one per artifact) # Files can be named: "summary-*.md" or "{branch}:{build_preset} summary" summary_files = list(artifacts_path.rglob("summary-*.md")) # Also look for files ending with " summary" (without .md extension) summary_files.extend(artifacts_path.rglob("* summary")) for summary_file in summary_files: try: with open(summary_file, 'r', encoding='utf-8') as f: summary_text = f.read() # Extract job name from filename # Format: "summary-{branch}:{build_preset}.md" or " {branch}:{build_preset} summary" filename = summary_file.name.strip() # Remove leading/trailing whitespace # Handle format: "summary-{branch}:{build_preset}.md" if filename.startswith('summary-') and filename.endswith('.md'): job_name = filename[8:-3].strip() # Remove "summary-" prefix and ".md" suffix # Handle format: "{branch}:{build_preset} summary" (without .md extension) elif filename.endswith(' summary'): job_name = filename[:-8].strip() # Remove " summary" suffix else: job_name = summary_file.stem.replace('summary-', '').strip() branch, build_preset = extract_job_info_from_name(job_name) if not branch or not build_preset: # Try to extract from filename directly if ':' in job_name: parts = job_name.split(':', 1) branch = parts[0].strip() build_preset = parts[1].strip() if not branch or not build_preset: print(f"Warning: Could not extract branch/build_preset from {job_name} (file: {summary_file})", file=sys.stderr) continue key = f"{branch}:{build_preset}" # Parse statistics from last attempt stats = parse_summary_markdown(summary_text) test_report_url = extract_test_report_url(summary_text) # Parse all attempts for detailed table all_attempts = parse_all_attempts(summary_text) summaries[key] = { 'branch': branch, 'build_preset': build_preset, 'job_name': job_name, 'stats': stats or {}, 'test_report_url': test_report_url, 'all_attempts': all_attempts, # List of all attempts with stats and URLs } print(f"Collected summary for {key}", file=sys.stderr) except Exception as e: print(f"Warning: Error processing {summary_file}: {e}", file=sys.stderr) continue return summaries def generate_combined_markdown_table(summaries: Dict[str, Dict]) -> str: """ Generate combined markdown table. Rows: branches Columns: build_presets Cells: error counts with links (always show link, even if no errors) Plus detailed table with all attempts below. """ if not summaries: return "No job summaries available.\n" # Collect all branches and build_presets branches = sorted(set(s['branch'] for s in summaries.values())) presets = sorted(set(s['build_preset'] for s in summaries.values())) if not branches or not presets: return "No valid job summaries found.\n" lines = [] lines.append("## 📊 Combined Test Summary\n") lines.append("| Branch | " + " | ".join(presets) + " |") lines.append("|" + "---|" * (len(presets) + 1)) for branch in branches: row = [branch] for preset in presets: key = f"{branch}:{preset}" if key in summaries: summary = summaries[key] stats = summary.get('stats', {}) failed = stats.get('failed', 0) errors = stats.get('errors', 0) total_errors = failed + errors test_report_url = summary.get('test_report_url', '') if test_report_url: if total_errors > 0: cell = f"[{total_errors}]({test_report_url})" if failed > 0 and errors > 0: cell += f" ({failed} failed, {errors} errors)" elif failed > 0: cell += f" ({failed} failed)" else: cell += f" ({errors} errors)" else: # No errors, but still show link cell = f"[✓]({test_report_url})" else: # No URL available if total_errors > 0: cell = str(total_errors) if failed > 0 and errors > 0: cell += f" ({failed} failed, {errors} errors)" elif failed > 0: cell += f" ({failed} failed)" else: cell += f" ({errors} errors)" else: cell = "✓" row.append(cell) else: row.append("—") lines.append("| " + " | ".join(row) + " |") # Add detailed table with all attempts # Same structure as main table: branches as rows, build_presets as columns lines.append("\n### Detailed Results (All Attempts)\n") lines.append("| Branch | " + " | ".join(presets) + " |") lines.append("|" + "---|" * (len(presets) + 1)) for branch in branches: row = [branch] for preset in presets: key = f"{branch}:{preset}" if key in summaries: summary = summaries[key] all_attempts = summary.get('all_attempts', []) if all_attempts: # Format all attempts in this cell attempt_parts = [] for attempt in all_attempts: attempt_num = attempt.get('attempt', 0) attempt_stats = attempt.get('stats', {}) failed = attempt_stats.get('failed', 0) errors = attempt_stats.get('errors', 0) attempt_url = attempt.get('url', '') total_errors = failed + errors if attempt_url: if total_errors > 0: attempt_text = f"try_{attempt_num}: [{total_errors}]({attempt_url})" if failed > 0 and errors > 0: attempt_text += f" ({failed}f, {errors}e)" elif failed > 0: attempt_text += f" ({failed}f)" else: attempt_text += f" ({errors}e)" else: attempt_text = f"try_{attempt_num}: [✓]({attempt_url})" else: if total_errors > 0: attempt_text = f"try_{attempt_num}: {total_errors}" if failed > 0 and errors > 0: attempt_text += f" ({failed}f, {errors}e)" elif failed > 0: attempt_text += f" ({failed}f)" else: attempt_text += f" ({errors}e)" else: attempt_text = f"try_{attempt_num}: ✓" attempt_parts.append(attempt_text) cell = "
".join(attempt_parts) else: # Fallback if all_attempts not parsed stats = summary.get('stats', {}) failed = stats.get('failed', 0) errors = stats.get('errors', 0) test_report_url = summary.get('test_report_url', '') total_errors = failed + errors if test_report_url: if total_errors > 0: cell = f"last: [{total_errors}]({test_report_url})" if failed > 0 and errors > 0: cell += f" ({failed}f, {errors}e)" elif failed > 0: cell += f" ({failed}f)" else: cell += f" ({errors}e)" else: cell = f"last: [✓]({test_report_url})" else: if total_errors > 0: cell = f"last: {total_errors}" if failed > 0 and errors > 0: cell += f" ({failed}f, {errors}e)" elif failed > 0: cell += f" ({failed}f)" else: cell += f" ({errors}e)" else: cell = "last: ✓" row.append(cell) else: row.append("—") lines.append("| " + " | ".join(row) + " |") return "\n".join(lines) + "\n" def main(): parser = argparse.ArgumentParser( description='Collect step summaries from matrix jobs and generate combined markdown table' ) parser.add_argument( '--artifacts-dir', required=True, help='Directory containing summary artifacts from matrix jobs' ) parser.add_argument( '--output', help='Output file for combined summary (default: GITHUB_STEP_SUMMARY)' ) args = parser.parse_args() # Collect summaries print("Collecting summaries from artifacts...", file=sys.stderr) summaries = collect_summaries_from_artifacts(args.artifacts_dir) print(f"Found {len(summaries)} job summaries", file=sys.stderr) # Generate combined markdown table combined_markdown = generate_combined_markdown_table(summaries) # Write output output_file = args.output or os.environ.get('GITHUB_STEP_SUMMARY') if output_file: with open(output_file, 'w', encoding='utf-8') as f: f.write(combined_markdown) print(f"Combined summary written to {output_file}", file=sys.stderr) else: # Write to stdout if no output file specified print(combined_markdown) return 0 if __name__ == '__main__': sys.exit(main())