name: Collect-analytics-fast-run on: schedule: - cron: "*/30 * * * *" # Every 30 min workflow_dispatch: inputs: commit_sha: type: string default: "" defaults: run: shell: bash jobs: main: name: Checkout and setup runs-on: [ self-hosted, auto-provisioned, build-preset-analytic-node] steps: - name: Checkout uses: actions/checkout@v5 with: ref: ${{ inputs.commit_sha }} - name: Setup ydb access uses: ./.github/actions/setup_ci_ydb_service_account_key_file_credentials with: ci_ydb_service_account_key_file_credentials: ${{ secrets.CI_YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS }} ydb_qa_config: ${{ vars.YDB_QA_CONFIG }} - name: Install dependencies run: | python3 -m pip install ydb ydb[yc] codeowners pandas - name: Collect testowners continue-on-error: true run: python3 .github/scripts/analytics/upload_testowners.py - name: Upload new test history to fast table continue-on-error: true run: python3 .github/scripts/analytics/test_history_fast.py - name: Upload olap perfomance suites data mart continue-on-error: true run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/perfomance_olap_suites_mart.sql --table_path perfomance/olap/fast_results_siutes --store_type column --partition_keys RunTs --primary_keys RunTs Db Suite --ttl_min 43200 --ttl_key RunTs - name: Upload olap perfomance data mart continue-on-error: true run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/perfomance_olap_mart.sql --table_path perfomance/olap/fast_results --store_type column --partition_keys Run_start_timestamp --primary_keys Run_start_timestamp Db Suite Test Branch --ttl_min 43200 --ttl_key Run_start_timestamp - name: Upload pr-check statistics data mart continue-on-error: true run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/pr_check_stats.sql --table_path analytics/pr_check_stats --store_type column --partition_keys date --primary_keys date - name: Export GitHub issues env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} continue-on-error: true run: python3 .github/scripts/analytics/export_issues_to_ydb.py - name: Export GitHub pull_requests env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} continue-on-error: true run: python3 .github/scripts/analytics/export_pull_requests_to_ydb.py - name: Upload GitHub issue mapping table continue-on-error: true run: python3 .github/scripts/analytics/github_issue_mapping.py - name: Sync area to owner mapping (from owner_area_mapping.json) continue-on-error: true run: python3 .github/scripts/analytics/sync_area_to_owner_mapping.py - name: Decide cleanup schedule (daily at 02:00 UTC) id: cleanup_gate run: | if [ "$(date -u +%H)" = "02" ]; then echo "run_cleanup=true" >> "$GITHUB_OUTPUT" else echo "run_cleanup=false" >> "$GITHUB_OUTPUT" fi - name: Upload GitHub issues timeline data mart (after issues + area_to_owner_mapping) continue-on-error: true run: | if [ "${{ steps.cleanup_gate.outputs.run_cleanup }}" = "true" ]; then python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/github_issues_timeline.sql --table_path test_results/analytics/github_issues_timeline --store_type column --partition_keys date --primary_keys date issue_number project_item_id --cleanup_window_key date --cleanup_window_interval '31 * Interval("P1D")' else python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/github_issues_timeline.sql --table_path test_results/analytics/github_issues_timeline --store_type column --partition_keys date --primary_keys date issue_number project_item_id --cleanup_window_key date --cleanup_window_interval '0 * Interval("P1D")' fi - name: Upload GitHub issues bugs count by period (after github_issues_timeline) continue-on-error: true run: | if [ "${{ steps.cleanup_gate.outputs.run_cleanup }}" = "true" ]; then python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/github_issues_bugs_count_by_period.sql --table_path test_results/analytics/github_issues_bugs_count_by_period --store_type column --partition_keys date_window area --primary_keys date_window area --cleanup_window_key date_window --cleanup_window_interval '365 * Interval("P1D")' else python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/github_issues_bugs_count_by_period.sql --table_path test_results/analytics/github_issues_bugs_count_by_period --store_type column --partition_keys date_window area --primary_keys date_window area --cleanup_window_key date_window --cleanup_window_interval '0 * Interval("P1D")' fi - name: Upload muted tests with issue and area data mart continue-on-error: true run: | if [ "${{ steps.cleanup_gate.outputs.run_cleanup }}" = "true" ]; then python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/muted_tests_with_issue_and_area.sql --table_path test_results/analytics/muted_tests_with_issue_and_area --store_type column --partition_keys date_window branch build_type --primary_keys date_window full_name branch build_type --cleanup_window_key date_window --cleanup_window_interval '365 * Interval("P1D")' else python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/muted_tests_with_issue_and_area.sql --table_path test_results/analytics/muted_tests_with_issue_and_area --store_type column --partition_keys date_window branch build_type --primary_keys date_window full_name branch build_type --cleanup_window_key date_window --cleanup_window_interval '0 * Interval("P1D")' fi - name: Upload muted tests daily by team (pre-aggregation for BI) continue-on-error: true run: | if [ "${{ steps.cleanup_gate.outputs.run_cleanup }}" = "true" ]; then python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/muted_tests_daily_by_team.sql --table_path test_results/analytics/muted_tests_daily_by_team --store_type column --partition_keys date_window branch build_type area --primary_keys date_window area branch build_type --cleanup_window_key date_window --cleanup_window_interval '365 * Interval("P1D")' else python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/muted_tests_daily_by_team.sql --table_path test_results/analytics/muted_tests_daily_by_team --store_type column --partition_keys date_window branch build_type area --primary_keys date_window area branch build_type --cleanup_window_key date_window --cleanup_window_interval '0 * Interval("P1D")' fi - name: Upload postcommit retry data mart continue-on-error: true run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/datamart_postcommit_retry.sql --table_path test_results/test_results/analytics/postcommit_retry --store_type column --partition_keys postcommit_start_run_timestamp --primary_keys postcommit_start_run_timestamp commit --ttl_min 259200 --ttl_key postcommit_start_run_timestamp - name: Upload PR blocked by failed tests data mart continue-on-error: true run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/pr_blocked_by_failed_tests_rich.sql --table_path test_results/analytics/pr_blocked_by_failed_tests_rich --store_type column --partition_keys last_run_timestamp --primary_keys last_run_timestamp full_name pr_number job_id - name: Upload PR blocked by failed tests with PR info data mart continue-on-error: true run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/pr_blocked_by_failed_tests_rich_with_pr_and_mute.sql --table_path test_results/analytics/pr_blocked_by_failed_tests_rich_with_pr_and_mute --store_type column --partition_keys last_run_timestamp --primary_keys last_run_timestamp full_name pr_number job_id - name: Upload PR blocked by failed tests all runs on last commit (after rich_with_pr_and_mute) continue-on-error: true run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/pr_blocked_by_failed_tests_rich_with_pr_and_mute_all_runs_on_last_commit.sql --table_path test_results/analytics/pr_blocked_by_failed_tests_rich_all_runs_on_last_commit --store_type column --partition_keys run_timestamp --primary_keys run_timestamp full_name pr_number branch job_id - name: Upload PR with test failures (any failures, 1 day) continue-on-error: true run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/pr_with_test_failures.sql --table_path test_results/analytics/pr_blocked_by_tests --store_type column --partition_keys last_run_timestamp --primary_keys last_run_timestamp full_name pr_number job_id - name: Upload PR check failures by attempt (all jobs in window, is_last_run_in_pr flag) continue-on-error: true run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/pr_check_failures_by_attempt.sql --table_path test_results/analytics/pr_check_failures_by_attempt --store_type column --partition_keys last_run_timestamp --primary_keys last_run_timestamp full_name pr_number branch job_id attempt_number - name: Upload PR failed in attempt but not run in next data mart continue-on-error: true run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/pr_failed_in_attempt_but_not_run_in_next.sql --table_path test_results/analytics/pr_failed_in_attempt_but_not_run_in_next --store_type column --partition_keys last_run_timestamp --primary_keys last_run_timestamp full_name pr_number job_id gap_type - name: Upload Nemesis aggregate data mart continue-on-error: true run: python3 .github/scripts/analytics/data_mart_executor.py --query_path .github/scripts/analytics/data_mart_queries/stability_aggregate_mart.sql --table_path nemesis/aggregated_mart --store_type column --partition_keys RunTs --primary_keys RunTs Db Suite Test --ttl_min 43200 --ttl_key RunTs - name: Upload mute latency data continue-on-error: true run: | git fetch --shallow-since="90 days ago" origin HEAD python3 .github/scripts/analytics/mute_latency_from_failure.py