.github/workflows/collect_analytics_fast.yml


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130

name: Collect-analytics-fast-run
on:
  schedule:
    - cron: "*/30 * * * *"  # Every 30 min
  workflow_dispatch:
    inputs:
      commit_sha:
        type: string
        default: ""
        
defaults:
  run:
    shell: bash
jobs:
  main:
    name: Checkout and setup
    runs-on: [ self-hosted, auto-provisioned, build-preset-analytic-node]
    steps:
    - name: Checkout
      uses: actions/checkout@v5
      with:
        ref: ${{ inputs.commit_sha }}
    - name: Setup ydb access
      uses: ./.github/actions/setup_ci_ydb_service_account_key_file_credentials
      with:
        ci_ydb_service_account_key_file_credentials: ${{ secrets.CI_YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS }}
        ydb_qa_config: ${{ vars.YDB_QA_CONFIG }}
    - name: Install dependencies
      run: |
        python3 -m pip install ydb ydb[yc] codeowners pandas
    - name: Collect testowners
      continue-on-error: true
      run: python3 .github/scripts/analytics/upload_testowners.py
    - name: Upload new test history to fast table
      continue-on-error: true
      run: python3 .github/scripts/analytics/test_history_fast.py
    - name: Upload olap perfomance suites data mart
      continue-on-error: true
      run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/perfomance_olap_suites_mart.sql --table_path perfomance/olap/fast_results_siutes --store_type column --partition_keys RunTs --primary_keys RunTs Db Suite --ttl_min 43200 --ttl_key RunTs
    - name: Upload olap perfomance data mart
      continue-on-error: true
      run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/perfomance_olap_mart.sql --table_path perfomance/olap/fast_results --store_type column --partition_keys Run_start_timestamp --primary_keys Run_start_timestamp Db Suite Test Branch --ttl_min 43200 --ttl_key Run_start_timestamp
    - name: Upload pr-check statistics data mart
      continue-on-error: true
      run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/pr_check_stats.sql --table_path analytics/pr_check_stats --store_type column --partition_keys date --primary_keys date
    - name: Export GitHub issues
      env:
        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
      continue-on-error: true
      run: python3 .github/scripts/analytics/export_issues_to_ydb.py
    - name: Export GitHub pull_requests
      env:
        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
      continue-on-error: true
      run: python3 .github/scripts/analytics/export_pull_requests_to_ydb.py
    - name: Upload GitHub issue mapping table
      continue-on-error: true
      run: python3 .github/scripts/analytics/github_issue_mapping.py
    - name: Sync area to owner mapping (from owner_area_mapping.json)
      continue-on-error: true
      run: python3 .github/scripts/analytics/sync_area_to_owner_mapping.py
    - name: Decide cleanup schedule (daily at 02:00 UTC)
      id: cleanup_gate
      run: |
        if [ "$(date -u +%H)" = "02" ]; then
          echo "run_cleanup=true" >> "$GITHUB_OUTPUT"
        else
          echo "run_cleanup=false" >> "$GITHUB_OUTPUT"
        fi
    - name: Upload GitHub issues timeline data mart (after issues + area_to_owner_mapping)
      continue-on-error: true
      run: |
        if [ "${{ steps.cleanup_gate.outputs.run_cleanup }}" = "true" ]; then
          python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/github_issues_timeline.sql --table_path test_results/analytics/github_issues_timeline --store_type column --partition_keys date --primary_keys date issue_number project_item_id --cleanup_window_key date --cleanup_window_interval '31 * Interval("P1D")'
        else
          python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/github_issues_timeline.sql --table_path test_results/analytics/github_issues_timeline --store_type column --partition_keys date --primary_keys date issue_number project_item_id --cleanup_window_key date --cleanup_window_interval '0 * Interval("P1D")'
        fi
    - name: Upload GitHub issues bugs count by period (after github_issues_timeline)
      continue-on-error: true
      run: |
        if [ "${{ steps.cleanup_gate.outputs.run_cleanup }}" = "true" ]; then
          python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/github_issues_bugs_count_by_period.sql --table_path test_results/analytics/github_issues_bugs_count_by_period --store_type column --partition_keys date_window area --primary_keys date_window area --cleanup_window_key date_window --cleanup_window_interval '365 * Interval("P1D")'
        else
          python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/github_issues_bugs_count_by_period.sql --table_path test_results/analytics/github_issues_bugs_count_by_period --store_type column --partition_keys date_window area --primary_keys date_window area --cleanup_window_key date_window --cleanup_window_interval '0 * Interval("P1D")'
        fi
    - name: Upload muted tests with issue and area data mart
      continue-on-error: true
      run: |
        if [ "${{ steps.cleanup_gate.outputs.run_cleanup }}" = "true" ]; then
          python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/muted_tests_with_issue_and_area.sql --table_path test_results/analytics/muted_tests_with_issue_and_area --store_type column --partition_keys date_window branch build_type --primary_keys date_window full_name branch build_type --cleanup_window_key date_window --cleanup_window_interval '365 * Interval("P1D")'
        else
          python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/muted_tests_with_issue_and_area.sql --table_path test_results/analytics/muted_tests_with_issue_and_area --store_type column --partition_keys date_window branch build_type --primary_keys date_window full_name branch build_type --cleanup_window_key date_window --cleanup_window_interval '0 * Interval("P1D")'
        fi
    - name: Upload muted tests daily by team (pre-aggregation for BI)
      continue-on-error: true
      run: |
        if [ "${{ steps.cleanup_gate.outputs.run_cleanup }}" = "true" ]; then
          python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/muted_tests_daily_by_team.sql --table_path test_results/analytics/muted_tests_daily_by_team --store_type column --partition_keys date_window branch build_type area --primary_keys date_window area branch build_type --cleanup_window_key date_window --cleanup_window_interval '365 * Interval("P1D")'
        else
          python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/muted_tests_daily_by_team.sql --table_path test_results/analytics/muted_tests_daily_by_team --store_type column --partition_keys date_window branch build_type area --primary_keys date_window area branch build_type --cleanup_window_key date_window --cleanup_window_interval '0 * Interval("P1D")'
        fi
    - name: Upload postcommit retry data mart
      continue-on-error: true
      run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/datamart_postcommit_retry.sql --table_path test_results/test_results/analytics/postcommit_retry --store_type column --partition_keys postcommit_start_run_timestamp --primary_keys postcommit_start_run_timestamp commit --ttl_min 259200 --ttl_key postcommit_start_run_timestamp
    - name: Upload PR blocked by failed tests data mart
      continue-on-error: true
      run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/pr_blocked_by_failed_tests_rich.sql --table_path test_results/analytics/pr_blocked_by_failed_tests_rich --store_type column --partition_keys last_run_timestamp --primary_keys last_run_timestamp full_name pr_number job_id
    - name: Upload PR blocked by failed tests with PR info data mart
      continue-on-error: true
      run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/pr_blocked_by_failed_tests_rich_with_pr_and_mute.sql --table_path test_results/analytics/pr_blocked_by_failed_tests_rich_with_pr_and_mute --store_type column --partition_keys last_run_timestamp --primary_keys last_run_timestamp full_name pr_number job_id
    - name: Upload PR blocked by failed tests all runs on last commit (after rich_with_pr_and_mute)
      continue-on-error: true
      run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/pr_blocked_by_failed_tests_rich_with_pr_and_mute_all_runs_on_last_commit.sql --table_path test_results/analytics/pr_blocked_by_failed_tests_rich_all_runs_on_last_commit --store_type column --partition_keys run_timestamp --primary_keys run_timestamp full_name pr_number branch job_id
    - name: Upload PR with test failures (any failures, 1 day)
      continue-on-error: true
      run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/pr_with_test_failures.sql --table_path test_results/analytics/pr_blocked_by_tests --store_type column --partition_keys last_run_timestamp --primary_keys last_run_timestamp full_name pr_number job_id
    - name: Upload PR check failures by attempt (all jobs in window, is_last_run_in_pr flag)
      continue-on-error: true
      run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/pr_check_failures_by_attempt.sql --table_path test_results/analytics/pr_check_failures_by_attempt --store_type column --partition_keys last_run_timestamp --primary_keys last_run_timestamp full_name pr_number branch job_id attempt_number
    - name: Upload PR failed in attempt but not run in next data mart
      continue-on-error: true
      run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/pr_failed_in_attempt_but_not_run_in_next.sql --table_path test_results/analytics/pr_failed_in_attempt_but_not_run_in_next --store_type column --partition_keys last_run_timestamp --primary_keys last_run_timestamp full_name pr_number job_id gap_type
    - name: Upload Nemesis aggregate data mart
      continue-on-error: true
      run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/stability_aggregate_mart.sql --table_path nemesis/aggregated_mart --store_type column --partition_keys RunTs --primary_keys RunTs Db Suite Test --ttl_min 43200 --ttl_key RunTs
    - name: Upload mute latency data
      continue-on-error: true
      run: |
        git fetch --shallow-since="90 days ago" origin HEAD
        python3 .github/scripts/analytics/mute_latency_from_failure.py