name: Collect-analytics-fast-run
on:
  schedule:
    - cron: "*/30 * * * *"  # Every 30 min
  workflow_dispatch:
    inputs:
      commit_sha:
        type: string
        default: ""
        
defaults:
  run:
    shell: bash
jobs:
  main:
    name: Checkout and setup
    runs-on: [ self-hosted, auto-provisioned, build-preset-analytic-node]
    steps:
    - name: Checkout
      uses: actions/checkout@v5
      with:
        ref: ${{ inputs.commit_sha }}
    - name: Setup ydb access
      uses: ./.github/actions/setup_ci_ydb_service_account_key_file_credentials
      with:
        ci_ydb_service_account_key_file_credentials: ${{ secrets.CI_YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS }}
        ydb_qa_config: ${{ vars.YDB_QA_CONFIG }}
    - name: Install dependencies
      run: |
        python3 -m pip install ydb ydb[yc] codeowners pandas
    - name: Collect testowners
      continue-on-error: true
      run: python3 .github/scripts/analytics/upload_testowners.py
    - name: Upload new test history to fast table
      continue-on-error: true
      run: python3 .github/scripts/analytics/test_history_fast.py
    - name: Upload olap perfomance suites data mart
      continue-on-error: true
      run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/perfomance_olap_suites_mart.sql --table_path perfomance/olap/fast_results_siutes --store_type column --partition_keys RunTs --primary_keys RunTs Db Suite --ttl_min 43200 --ttl_key RunTs
    - name: Upload olap perfomance data mart
      continue-on-error: true
      run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/perfomance_olap_mart.sql --table_path perfomance/olap/fast_results --store_type column --partition_keys Run_start_timestamp --primary_keys Run_start_timestamp Db Suite Test Branch --ttl_min 43200 --ttl_key Run_start_timestamp
    - name: Upload pr-check statistics data mart
      continue-on-error: true
      run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/pr_check_stats.sql --table_path analytics/pr_check_stats --store_type column --partition_keys date --primary_keys date
    - name: Export GitHub issues
      env:
        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
      continue-on-error: true
      run: python3 .github/scripts/analytics/export_issues_to_ydb.py
    - name: Export GitHub pull_requests
      env:
        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
      continue-on-error: true
      run: python3 .github/scripts/analytics/export_pull_requests_to_ydb.py
    - name: Upload GitHub issue mapping table
      continue-on-error: true
      run: python3 .github/scripts/analytics/github_issue_mapping.py
    - name: Sync area to owner mapping (from owner_area_mapping.json)
      continue-on-error: true
      run: python3 .github/scripts/analytics/sync_area_to_owner_mapping.py
    - name: Decide cleanup schedule (daily at 02:00 UTC)
      id: cleanup_gate
      run: |
        if [ "$(date -u +%H)" = "02" ]; then
          echo "run_cleanup=true" >> "$GITHUB_OUTPUT"
        else
          echo "run_cleanup=false" >> "$GITHUB_OUTPUT"
        fi
    - name: Upload GitHub issues timeline data mart (after issues + area_to_owner_mapping)
      continue-on-error: true
      run: |
        if [ "${{ steps.cleanup_gate.outputs.run_cleanup }}" = "true" ]; then
          python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/github_issues_timeline.sql --table_path test_results/analytics/github_issues_timeline --store_type column --partition_keys date --primary_keys date issue_number project_item_id --cleanup_window_key date --cleanup_window_interval '31 * Interval("P1D")'
        else
          python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/github_issues_timeline.sql --table_path test_results/analytics/github_issues_timeline --store_type column --partition_keys date --primary_keys date issue_number project_item_id --cleanup_window_key date --cleanup_window_interval '0 * Interval("P1D")'
        fi
    - name: Upload GitHub issues bugs count by period (after github_issues_timeline)
      continue-on-error: true
      run: |
        if [ "${{ steps.cleanup_gate.outputs.run_cleanup }}" = "true" ]; then
          python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/github_issues_bugs_count_by_period.sql --table_path test_results/analytics/github_issues_bugs_count_by_period --store_type column --partition_keys date_window area --primary_keys date_window area --cleanup_window_key date_window --cleanup_window_interval '365 * Interval("P1D")'
        else
          python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/github_issues_bugs_count_by_period.sql --table_path test_results/analytics/github_issues_bugs_count_by_period --store_type column --partition_keys date_window area --primary_keys date_window area --cleanup_window_key date_window --cleanup_window_interval '0 * Interval("P1D")'
        fi
    - name: Upload muted tests with issue and area data mart
      continue-on-error: true
      run: |
        if [ "${{ steps.cleanup_gate.outputs.run_cleanup }}" = "true" ]; then
          python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/muted_tests_with_issue_and_area.sql --table_path test_results/analytics/muted_tests_with_issue_and_area --store_type column --partition_keys date_window branch build_type --primary_keys date_window full_name branch build_type --cleanup_window_key date_window --cleanup_window_interval '365 * Interval("P1D")'
        else
          python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/muted_tests_with_issue_and_area.sql --table_path test_results/analytics/muted_tests_with_issue_and_area --store_type column --partition_keys date_window branch build_type --primary_keys date_window full_name branch build_type --cleanup_window_key date_window --cleanup_window_interval '0 * Interval("P1D")'
        fi
    - name: Upload muted tests daily by team (pre-aggregation for BI)
      continue-on-error: true
      run: |
        if [ "${{ steps.cleanup_gate.outputs.run_cleanup }}" = "true" ]; then
          python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/muted_tests_daily_by_team.sql --table_path test_results/analytics/muted_tests_daily_by_team --store_type column --partition_keys date_window branch build_type area --primary_keys date_window area branch build_type --cleanup_window_key date_window --cleanup_window_interval '365 * Interval("P1D")'
        else
          python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/muted_tests_daily_by_team.sql --table_path test_results/analytics/muted_tests_daily_by_team --store_type column --partition_keys date_window branch build_type area --primary_keys date_window area branch build_type --cleanup_window_key date_window --cleanup_window_interval '0 * Interval("P1D")'
        fi
    - name: Upload postcommit retry data mart
      continue-on-error: true
      run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/datamart_postcommit_retry.sql --table_path test_results/test_results/analytics/postcommit_retry --store_type column --partition_keys postcommit_start_run_timestamp --primary_keys postcommit_start_run_timestamp commit --ttl_min 259200 --ttl_key postcommit_start_run_timestamp
    - name: Upload PR blocked by failed tests data mart
      continue-on-error: true
      run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/pr_blocked_by_failed_tests_rich.sql --table_path test_results/analytics/pr_blocked_by_failed_tests_rich --store_type column --partition_keys last_run_timestamp --primary_keys last_run_timestamp full_name pr_number job_id
    - name: Upload PR blocked by failed tests with PR info data mart
      continue-on-error: true
      run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/pr_blocked_by_failed_tests_rich_with_pr_and_mute.sql --table_path test_results/analytics/pr_blocked_by_failed_tests_rich_with_pr_and_mute --store_type column --partition_keys last_run_timestamp --primary_keys last_run_timestamp full_name pr_number job_id
    - name: Upload PR blocked by failed tests all runs on last commit (after rich_with_pr_and_mute)
      continue-on-error: true
      run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/pr_blocked_by_failed_tests_rich_with_pr_and_mute_all_runs_on_last_commit.sql --table_path test_results/analytics/pr_blocked_by_failed_tests_rich_all_runs_on_last_commit --store_type column --partition_keys run_timestamp --primary_keys run_timestamp full_name pr_number branch job_id
    - name: Upload PR with test failures (any failures, 1 day)
      continue-on-error: true
      run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/pr_with_test_failures.sql --table_path test_results/analytics/pr_blocked_by_tests --store_type column --partition_keys last_run_timestamp --primary_keys last_run_timestamp full_name pr_number job_id
    - name: Upload PR check failures by attempt (all jobs in window, is_last_run_in_pr flag)
      continue-on-error: true
      run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/pr_check_failures_by_attempt.sql --table_path test_results/analytics/pr_check_failures_by_attempt --store_type column --partition_keys last_run_timestamp --primary_keys last_run_timestamp full_name pr_number branch job_id attempt_number
    - name: Upload PR failed in attempt but not run in next data mart
      continue-on-error: true
      run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/pr_failed_in_attempt_but_not_run_in_next.sql --table_path test_results/analytics/pr_failed_in_attempt_but_not_run_in_next --store_type column --partition_keys last_run_timestamp --primary_keys last_run_timestamp full_name pr_number job_id gap_type
    - name: Upload Nemesis aggregate data mart
      continue-on-error: true
      run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/stability_aggregate_mart.sql --table_path nemesis/aggregated_mart --store_type column --partition_keys RunTs --primary_keys RunTs Db Suite Test --ttl_min 43200 --ttl_key RunTs
    - name: Upload mute latency data
      continue-on-error: true
      run: |
        git fetch --shallow-since="90 days ago" origin HEAD
        python3 .github/scripts/analytics/mute_latency_from_failure.py