diff --git a/.github/workflows/run-sweep.yml b/.github/workflows/run-sweep.yml index c5ece9804..2b08a4a60 100644 --- a/.github/workflows/run-sweep.yml +++ b/.github/workflows/run-sweep.yml @@ -135,9 +135,88 @@ jobs: --ref "${{ github.ref }}" \ --workflow-id "run-sweep.yml" - sweep-multi-node-1k1k: + canary-select: needs: setup - if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k']) != 'null' }} + if: >- + needs.setup.outputs.reuse-enabled != 'true' && + github.event_name == 'pull_request' && + contains(github.event.pull_request.labels.*.name, 'full-sweep-enabled') && + !contains(github.event.pull_request.labels.*.name, 'evals-only') + runs-on: ubuntu-latest + outputs: + canary-config: ${{ steps.pick.outputs.canary-config }} + remaining-search-space-config: ${{ steps.pick.outputs.remaining-search-space-config }} + steps: + - id: pick + env: + SEARCH_SPACE: ${{ needs.setup.outputs.search-space-config }} + run: | + selection=$(jq -c ' + def remove_one($needle): + if $needle == null then . + else + (index($needle)) as $idx + | if $idx == null then . else del(.[$idx]) end + end; + + # Canary is a benchmark-only smoke test — exclude entries + # whose primary purpose is eval (run-eval == true) so the + # picked canary never runs an eval pass. + (((.single_node["1k1k"] // []) + (.single_node["8k1k"] // [])) + | map(select(.["run-eval"] != true))) as $candidates + | (if ($candidates | length) == 0 then null else ($candidates | min_by(.conc)) end) as $canary + | { + canary: (if $canary == null then [] else [$canary] end), + remaining: ( + . + | .single_node = (.single_node // {}) + | .single_node["1k1k"] = ((.single_node["1k1k"] // []) | remove_one($canary)) + | .single_node["8k1k"] = ((.single_node["8k1k"] // []) | remove_one($canary)) + ) + } + ' <<<"$SEARCH_SPACE") + echo "canary-config=$(jq -c '.canary' <<<"$selection")" >> "$GITHUB_OUTPUT" + echo "remaining-search-space-config=$(jq -c '.remaining' <<<"$selection")" >> "$GITHUB_OUTPUT" + + canary-sweep: + needs: canary-select + if: ${{ needs.canary-select.outputs.canary-config != '' && needs.canary-select.outputs.canary-config != '[]' }} + uses: ./.github/workflows/benchmark-tmpl.yml + name: canary / + strategy: + fail-fast: false + matrix: + config: ${{ fromJson(needs.canary-select.outputs.canary-config) }} + secrets: inherit + with: + exp-name: ${{ matrix.config.exp-name }} + isl: ${{ matrix.config.isl }} + osl: ${{ matrix.config.osl }} + max-model-len: ${{ matrix.config.max-model-len }} + runner: ${{ matrix.config.runner }} + image: ${{ matrix.config.image }} + model: ${{ matrix.config.model }} + model-prefix: ${{ matrix.config.model-prefix }} + framework: ${{ matrix.config.framework }} + precision: ${{ matrix.config.precision }} + tp: ${{ matrix.config.tp }} + ep: ${{ matrix.config.ep }} + dp-attn: ${{ matrix.config.dp-attn }} + conc: ${{ matrix.config.conc }} + spec-decoding: ${{ matrix.config.spec-decoding }} + disagg: ${{ matrix.config.disagg }} + run-eval: false + + sweep-multi-node-1k1k: + needs: [setup, canary-select, canary-sweep] + if: >- + ${{ + always() && + needs.setup.result == 'success' && + needs.setup.outputs.reuse-enabled != 'true' && + needs.canary-sweep.result != 'failure' && + toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['1k1k']) != 'null' + }} uses: ./.github/workflows/benchmark-multinode-tmpl.yml name: multi-node 1k1k / strategy: @@ -174,8 +253,15 @@ jobs: run-eval: false sweep-multi-node-8k1k: - needs: setup - if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['8k1k']) != 'null' }} + needs: [setup, canary-select, canary-sweep] + if: >- + ${{ + always() && + needs.setup.result == 'success' && + needs.setup.outputs.reuse-enabled != 'true' && + needs.canary-sweep.result != 'failure' && + toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['8k1k']) != 'null' + }} uses: ./.github/workflows/benchmark-multinode-tmpl.yml name: multi-node 8k1k / strategy: @@ -186,14 +272,22 @@ jobs: with: *multi-node-inputs sweep-single-node-1k1k: - needs: setup - if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).single_node['1k1k']) != 'null' }} + needs: [setup, canary-select, canary-sweep] + if: >- + ${{ + always() && + needs.setup.result == 'success' && + needs.setup.outputs.reuse-enabled != 'true' && + needs.canary-sweep.result != 'failure' && + toJson(fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['1k1k']) != 'null' && + toJson(fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['1k1k']) != '[]' + }} uses: ./.github/workflows/benchmark-tmpl.yml name: single-node 1k1k / strategy: fail-fast: false matrix: - config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['1k1k'] }} + config: ${{ fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['1k1k'] }} secrets: inherit with: &single-node-inputs exp-name: ${{ matrix.config.exp-name }} @@ -215,20 +309,35 @@ jobs: run-eval: ${{ matrix.config.run-eval }} sweep-single-node-8k1k: - needs: setup - if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).single_node['8k1k']) != 'null' }} + needs: [setup, canary-select, canary-sweep] + if: >- + ${{ + always() && + needs.setup.result == 'success' && + needs.setup.outputs.reuse-enabled != 'true' && + needs.canary-sweep.result != 'failure' && + toJson(fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['8k1k']) != 'null' && + toJson(fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['8k1k']) != '[]' + }} uses: ./.github/workflows/benchmark-tmpl.yml name: single-node 8k1k / strategy: fail-fast: false matrix: - config: ${{ fromJson(needs.setup.outputs.search-space-config).single_node['8k1k'] }} + config: ${{ fromJson((needs.canary-sweep.result == 'success' && needs.canary-select.outputs.remaining-search-space-config) || needs.setup.outputs.search-space-config).single_node['8k1k'] }} secrets: inherit with: *single-node-inputs sweep-agentic: - needs: setup - if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).single_node['agentic']) != 'null' }} + needs: [setup, canary-select, canary-sweep] + if: >- + ${{ + always() && + needs.setup.result == 'success' && + needs.setup.outputs.reuse-enabled != 'true' && + needs.canary-sweep.result != 'failure' && + toJson(fromJson(needs.setup.outputs.search-space-config).single_node['agentic']) != 'null' + }} uses: ./.github/workflows/benchmark-tmpl.yml name: agentic / strategy: @@ -259,8 +368,15 @@ jobs: scenario-type: agentic-coding sweep-multi-node-agentic: - needs: setup - if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['agentic']) != 'null' }} + needs: [setup, canary-select, canary-sweep] + if: >- + ${{ + always() && + needs.setup.result == 'success' && + needs.setup.outputs.reuse-enabled != 'true' && + needs.canary-sweep.result != 'failure' && + toJson(fromJson(needs.setup.outputs.search-space-config).multi_node['agentic']) != 'null' + }} uses: ./.github/workflows/benchmark-multinode-tmpl.yml name: multi-node agentic / strategy: @@ -298,8 +414,16 @@ jobs: scenario-type: agentic-coding sweep-evals: - needs: setup - if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).evals) != '[]' && toJson(fromJson(needs.setup.outputs.search-space-config).evals) != 'null' }} + needs: [setup, canary-select, canary-sweep] + if: >- + ${{ + always() && + needs.setup.result == 'success' && + needs.setup.outputs.reuse-enabled != 'true' && + needs.canary-sweep.result != 'failure' && + toJson(fromJson(needs.setup.outputs.search-space-config).evals) != '[]' && + toJson(fromJson(needs.setup.outputs.search-space-config).evals) != 'null' + }} uses: ./.github/workflows/benchmark-tmpl.yml name: eval / strategy: @@ -328,8 +452,16 @@ jobs: eval-only: true sweep-multi-node-evals: - needs: setup - if: ${{ needs.setup.outputs.reuse-enabled != 'true' && toJson(fromJson(needs.setup.outputs.search-space-config).multinode_evals) != '[]' && toJson(fromJson(needs.setup.outputs.search-space-config).multinode_evals) != 'null' }} + needs: [setup, canary-select, canary-sweep] + if: >- + ${{ + always() && + needs.setup.result == 'success' && + needs.setup.outputs.reuse-enabled != 'true' && + needs.canary-sweep.result != 'failure' && + toJson(fromJson(needs.setup.outputs.search-space-config).multinode_evals) != '[]' && + toJson(fromJson(needs.setup.outputs.search-space-config).multinode_evals) != 'null' + }} uses: ./.github/workflows/benchmark-multinode-tmpl.yml name: multi-node eval / strategy: @@ -368,6 +500,7 @@ jobs: collect-results: needs: [ + canary-sweep, sweep-single-node-1k1k, sweep-single-node-8k1k, sweep-agentic, @@ -381,6 +514,7 @@ jobs: always() && needs.setup.result == 'success' && ( + needs.canary-sweep.result == 'success' || needs.sweep-single-node-1k1k.result != 'skipped' || needs.sweep-single-node-8k1k.result != 'skipped' || needs.sweep-multi-node-1k1k.result != 'skipped' ||