From 729e888ebe49d949b7111ed496346f97595a5ba3 Mon Sep 17 00:00:00 2001 From: Hongtao Zhang Date: Tue, 2 Jun 2026 23:22:34 +0000 Subject: [PATCH 1/3] CI/CD - Clean up image builds, remove cuda 12.8/12.4/12.2 and add cuda13.0 merge --- .github/workflows/build-image.yml | 36 ++++++------------------------- 1 file changed, 6 insertions(+), 30 deletions(-) diff --git a/.github/workflows/build-image.yml b/.github/workflows/build-image.yml index b498ef1eb..c17328cac 100644 --- a/.github/workflows/build-image.yml +++ b/.github/workflows/build-image.yml @@ -41,18 +41,6 @@ jobs: platforms: linux/amd64 runner: [self-hosted, linux/amd64] build_args: "NUM_MAKE_JOBS=16" - - name: cuda12.8-arm64 - dockerfile: cuda12.8 - tags: superbench/main:cuda12.8-arm64 - platforms: linux/arm64 - runner: [self-hosted, linux/arm64] - build_args: "NUM_MAKE_JOBS=16" - - name: cuda12.8-amd64 - dockerfile: cuda12.8 - tags: superbench/main:cuda12.8-amd64 - platforms: linux/amd64 - runner: [self-hosted, linux/amd64] - build_args: "NUM_MAKE_JOBS=16" - name: cuda12.9-arm64 dockerfile: cuda12.9 tags: superbench/main:cuda12.9-arm64 @@ -65,18 +53,6 @@ jobs: platforms: linux/amd64 runner: [self-hosted, linux/amd64] build_args: "NUM_MAKE_JOBS=16" - - name: cuda12.4 - dockerfile: cuda12.4 - tags: superbench/main:cuda12.4 - platforms: linux/amd64 - runner: [self-hosted, linux/amd64] - build_args: "NUM_MAKE_JOBS=16" - - name: cuda12.2 - dockerfile: cuda12.2 - tags: superbench/main:cuda12.2 - platforms: linux/amd64 - runner: [self-hosted, linux/amd64] - build_args: "NUM_MAKE_JOBS=16" - name: cuda11.1.1 dockerfile: cuda11.1.1 tags: superbench/main:cuda11.1.1,superbench/superbench:latest @@ -191,7 +167,7 @@ jobs: run: echo ${{ steps.docker_build.outputs.digest }} docker-merge: needs: docker-build - if: ${{ !cancelled() }} + if: ${{ success() }} name: Docker merge ${{ matrix.name }} runs-on: self-hosted timeout-minutes: 300 @@ -201,16 +177,16 @@ jobs: strategy: matrix: include: - - name: cuda12.8 - tags: superbench/main:cuda12.8 - sources: >- - superbench/main:cuda12.8-amd64 - superbench/main:cuda12.8-arm64 - name: cuda12.9 tags: superbench/main:cuda12.9 sources: >- superbench/main:cuda12.9-amd64 superbench/main:cuda12.9-arm64 + - name: cuda13.0 + tags: superbench/main:cuda13.0 + sources: >- + superbench/main:cuda13.0-amd64 + superbench/main:cuda13.0-arm64 steps: - name: Checkout uses: actions/checkout@v4 From 62d9c9735a08f902089d179f36a58c6efad63067 Mon Sep 17 00:00:00 2001 From: Hongtao Zhang Date: Tue, 2 Jun 2026 23:31:19 +0000 Subject: [PATCH 2/3] CI/CD - Keep docker-merge gated on not-cancelled and add fail-fast false so CUDA merges stay independent of flaky ROCm builds --- .github/workflows/build-image.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-image.yml b/.github/workflows/build-image.yml index c17328cac..de43d1f93 100644 --- a/.github/workflows/build-image.yml +++ b/.github/workflows/build-image.yml @@ -167,7 +167,7 @@ jobs: run: echo ${{ steps.docker_build.outputs.digest }} docker-merge: needs: docker-build - if: ${{ success() }} + if: ${{ !cancelled() }} name: Docker merge ${{ matrix.name }} runs-on: self-hosted timeout-minutes: 300 @@ -175,6 +175,9 @@ jobs: contents: read packages: write strategy: + # Keep merging independent CUDA manifests even if one merge leg fails, so a + # single failed build does not block publishing other successfully built images. + fail-fast: false matrix: include: - name: cuda12.9 From 3ae7719fb74eaa6c859c4432958f075bbce5de8b Mon Sep 17 00:00:00 2001 From: Hongtao Zhang Date: Tue, 2 Jun 2026 23:33:10 +0000 Subject: [PATCH 3/3] CI/CD - Remove redundant comment on docker-merge fail-fast --- .github/workflows/build-image.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/build-image.yml b/.github/workflows/build-image.yml index de43d1f93..d61ddd9bd 100644 --- a/.github/workflows/build-image.yml +++ b/.github/workflows/build-image.yml @@ -175,8 +175,6 @@ jobs: contents: read packages: write strategy: - # Keep merging independent CUDA manifests even if one merge leg fails, so a - # single failed build does not block publishing other successfully built images. fail-fast: false matrix: include: