diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 4fcf08352..01606f902 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -31,6 +31,11 @@ jobs:
             python_version: "3.11"
             pytorch: 2.7.0
             axolotl_extras:
+          - cuda: 128
+            cuda_version: 12.8.1
+            python_version: "3.11"
+            pytorch: 2.7.0
+            axolotl_extras:
     runs-on: axolotl-gpu-runner
     steps:
       - name: Checkout
@@ -94,6 +99,11 @@ jobs:
             python_version: "3.11"
             pytorch: 2.7.0
             axolotl_extras:
+          - cuda: 128
+            cuda_version: 12.8.1
+            python_version: "3.11"
+            pytorch: 2.7.0
+            axolotl_extras:
     runs-on: axolotl-gpu-runner
     steps:
       - name: Checkout
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index c296e2314..69f0a030d 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -295,6 +295,7 @@ jobs:
           find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \;
 
   docker-e2e-tests-1st:
+    # Run this job first as a gate for running the remainder of the test matrix
     if: ${{ ! contains(github.event.commits[0].message, '[skip e2e]') && github.repository_owner == 'axolotl-ai-cloud' }}
     # this job needs to be run on self-hosted GPU runners...
     runs-on: [self-hosted, modal]
@@ -341,6 +342,8 @@ jobs:
     # this job needs to be run on self-hosted GPU runners...
     runs-on: [self-hosted, modal]
     timeout-minutes: 90
+    # Only run the remainder of the matrix if the first e2e check passed;
+    # this is to save on wasted compute costs for known failures that get caught in the first run
     needs: [pre-commit, pytest, docker-e2e-tests-1st]
 
     strategy:
@@ -365,6 +368,12 @@ jobs:
             pytorch: 2.7.0
             num_gpus: 1
             axolotl_extras:
+          - cuda: 128
+            cuda_version: 12.8.1
+            python_version: "3.11"
+            pytorch: 2.7.0
+            num_gpus: 1
+            axolotl_extras:
     steps:
       - name: Checkout
         uses: actions/checkout@v4