From afe18ace3590808766ea5f95790b228dc933c50c Mon Sep 17 00:00:00 2001
From: Wing Lian <wing@axolotl.ai>
Date: Thu, 1 Jan 2026 06:52:45 -0500
Subject: [PATCH] deprecate torch 2.7.1 (#3339)

---
 .github/workflows/base.yml                    | 49 ++++----------
 .github/workflows/main.yml                    | 64 +++++--------------
 .github/workflows/multi-gpu-e2e.yml           |  9 +--
 .github/workflows/nightlies.yml               | 20 +++---
 .github/workflows/tests-nightly.yml           | 16 ++---
 .github/workflows/tests.yml                   | 24 ++-----
 README.md                                     |  2 +-
 docs/docker.qmd                               | 16 ++---
 docs/installation.qmd                         |  4 +-
 .../cli/cloud/baseten/template/train_sft.py   |  3 +-
 src/axolotl/cli/cloud/modal_.py               |  2 +-
 11 files changed, 65 insertions(+), 144 deletions(-)

diff --git a/.github/workflows/base.yml b/.github/workflows/base.yml
index eddce1438..ea721bff4 100644
--- a/.github/workflows/base.yml
+++ b/.github/workflows/base.yml
@@ -25,27 +25,6 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - cuda: "126"
-            cuda_version: 12.6.3
-            cudnn_version: ""
-            python_version: "3.11"
-            pytorch: 2.7.0
-            torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
-            dockerfile: "Dockerfile-base"
-          - cuda: "126"
-            cuda_version: 12.6.3
-            cudnn_version: ""
-            python_version: "3.11"
-            pytorch: 2.7.1
-            torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
-            dockerfile: "Dockerfile-base"
-          - cuda: "128"
-            cuda_version: 12.8.1
-            cudnn_version: ""
-            python_version: "3.11"
-            pytorch: 2.7.1
-            torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
-            dockerfile: "Dockerfile-base"
           - cuda: "128"
             cuda_version: 12.8.1
             cudnn_version: ""
@@ -53,6 +32,13 @@ jobs:
             pytorch: 2.8.0
             torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
             dockerfile: "Dockerfile-base"
+          - cuda: "128"
+            cuda_version: 12.8.1
+            cudnn_version: ""
+            python_version: "3.11"
+            pytorch: 2.9.0
+            torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
+            dockerfile: "Dockerfile-base"
           - cuda: "128"
             cuda_version: 12.8.1
             cudnn_version: ""
@@ -121,20 +107,6 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - cuda: "126"
-            cuda_version: 12.6.3
-            cudnn_version: ""
-            python_version: "3.11"
-            pytorch: 2.7.1
-            torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
-            dockerfile: "Dockerfile-uv-base"
-          - cuda: "128"
-            cuda_version: 12.8.1
-            cudnn_version: ""
-            python_version: "3.11"
-            pytorch: 2.7.1
-            torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
-            dockerfile: "Dockerfile-uv-base"
           - cuda: "128"
             cuda_version: 12.8.1
             cudnn_version: ""
@@ -149,6 +121,13 @@ jobs:
             pytorch: 2.9.1
             torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
             dockerfile: "Dockerfile-uv-base"
+          - cuda: "128"
+            cuda_version: 12.8.1
+            cudnn_version: ""
+            python_version: "3.11"
+            pytorch: 2.9.0
+            torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
+            dockerfile: "Dockerfile-uv-base"
           - cuda: "130"
             cuda_version: 13.0.0
             cudnn_version: ""
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index f34a0cf2f..052f9aa72 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -15,21 +15,6 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - cuda: 126
-            cuda_version: 12.6.3
-            python_version: "3.11"
-            pytorch: 2.7.0
-            axolotl_extras:
-          - cuda: 126
-            cuda_version: 12.6.3
-            python_version: "3.11"
-            pytorch: 2.7.1
-            axolotl_extras: vllm
-          - cuda: 128
-            cuda_version: 12.8.1
-            python_version: "3.11"
-            pytorch: 2.7.1
-            axolotl_extras:
           - cuda: 128
             cuda_version: 12.8.1
             python_version: "3.11"
@@ -46,6 +31,11 @@ jobs:
             python_version: "3.11"
             pytorch: 2.9.1
             axolotl_extras:
+          - cuda: 130
+            cuda_version: 13.0.0
+            python_version: "3.11"
+            pytorch: 2.9.1
+            axolotl_extras:
     runs-on: axolotl-gpu-runner
     steps:
       - name: Checkout
@@ -92,27 +82,6 @@ jobs:
     strategy:
       matrix:
         include:
-          - cuda: 126
-            cuda_version: 12.6.3
-            python_version: "3.11"
-            pytorch: 2.7.0
-            axolotl_extras:
-          - cuda: 126
-            cuda_version: 12.6.3
-            python_version: "3.11"
-            pytorch: 2.7.1
-            axolotl_extras:
-            is_latest:
-          - cuda: 126
-            cuda_version: 12.6.3
-            python_version: "3.11"
-            pytorch: 2.7.1
-            axolotl_extras: vllm
-          - cuda: 128
-            cuda_version: 12.8.1
-            python_version: "3.11"
-            pytorch: 2.7.1
-            axolotl_extras:
           - cuda: 128
             cuda_version: 12.8.1
             python_version: "3.11"
@@ -129,6 +98,11 @@ jobs:
             python_version: "3.11"
             pytorch: 2.9.1
             axolotl_extras:
+          - cuda: 130
+            cuda_version: 13.0.0
+            python_version: "3.11"
+            pytorch: 2.9.1
+            axolotl_extras:
     runs-on: axolotl-gpu-runner
     steps:
       - name: Checkout
@@ -170,24 +144,18 @@ jobs:
     strategy:
       matrix:
         include:
-          - cuda: 126
-            cuda_version: 12.6.3
-            python_version: "3.11"
-            pytorch: 2.7.1
-            axolotl_extras:
-            is_latest:
-          - cuda: 126
-            cuda_version: 12.6.3
-            python_version: "3.11"
-            pytorch: 2.7.1
-            axolotl_extras: vllm
-            is_latest: true
           - cuda: 128
             cuda_version: 12.8.1
             python_version: "3.11"
             pytorch: 2.8.0
             axolotl_extras:
             is_latest:
+          - cuda: 128
+            cuda_version: 12.8.1
+            python_version: "3.11"
+            pytorch: 2.9.1
+            axolotl_extras:
+            is_latest:
     runs-on: axolotl-gpu-runner
     steps:
       - name: Checkout
diff --git a/.github/workflows/multi-gpu-e2e.yml b/.github/workflows/multi-gpu-e2e.yml
index 13162f8b1..1dd019dc7 100644
--- a/.github/workflows/multi-gpu-e2e.yml
+++ b/.github/workflows/multi-gpu-e2e.yml
@@ -29,13 +29,6 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - cuda: 126
-            cuda_version: 12.6.3
-            python_version: "3.11"
-            pytorch: 2.7.1
-            axolotl_extras: vllm
-            num_gpus: 2
-            nightly_build: "true"
           - cuda: 128
             cuda_version: 12.8.1
             python_version: "3.11"
@@ -46,7 +39,7 @@ jobs:
           - cuda: 128
             cuda_version: 12.8.1
             python_version: "3.11"
-            pytorch: 2.9.0
+            pytorch: 2.9.1
             axolotl_extras: fbgemm-gpu
             num_gpus: 2
             nightly_build: "true"
diff --git a/.github/workflows/nightlies.yml b/.github/workflows/nightlies.yml
index a24946ae9..d2c587cc7 100644
--- a/.github/workflows/nightlies.yml
+++ b/.github/workflows/nightlies.yml
@@ -12,16 +12,16 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - cuda: 126
-            cuda_version: 12.6.3
-            python_version: "3.11"
-            pytorch: 2.7.1
-            axolotl_extras:
           - cuda: 128
             cuda_version: 12.8.1
             python_version: "3.11"
             pytorch: 2.8.0
             axolotl_extras:
+          - cuda: 128
+            cuda_version: 12.8.1
+            python_version: "3.11"
+            pytorch: 2.9.1
+            axolotl_extras:
     runs-on: axolotl-gpu-runner
     steps:
       - name: Checkout
@@ -64,16 +64,16 @@ jobs:
     strategy:
       matrix:
         include:
-          - cuda: 126
-            cuda_version: 12.6.3
-            python_version: "3.11"
-            pytorch: 2.7.1
-            axolotl_extras:
           - cuda: 128
             cuda_version: 12.8.1
             python_version: "3.11"
             pytorch: 2.8.0
             axolotl_extras:
+          - cuda: 128
+            cuda_version: 12.8.1
+            python_version: "3.11"
+            pytorch: 2.9.1
+            axolotl_extras:
     runs-on: axolotl-gpu-runner
     steps:
       - name: Checkout
diff --git a/.github/workflows/tests-nightly.yml b/.github/workflows/tests-nightly.yml
index 53139fac1..67b68a7e6 100644
--- a/.github/workflows/tests-nightly.yml
+++ b/.github/workflows/tests-nightly.yml
@@ -26,7 +26,7 @@ jobs:
       max-parallel: 2
       matrix:
         python_version: ["3.11"]
-        pytorch_version: ["2.7.1", "2.8.0"]
+        pytorch_version: ["2.8.0", "2.9.0", "2.9.1"]
     timeout-minutes: 20
 
     steps:
@@ -99,17 +99,17 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - cuda: 126
-            cuda_version: 12.6.3
+          - cuda: 128
+            cuda_version: 12.8.1
             python_version: "3.11"
-            pytorch: 2.7.1
+            pytorch: 2.8.0
             num_gpus: 1
             axolotl_extras:
             nightly_build: "true"
           - cuda: 128
             cuda_version: 12.8.1
             python_version: "3.11"
-            pytorch: 2.8.0
+            pytorch: 2.9.1
             num_gpus: 1
             axolotl_extras:
             nightly_build: "true"
@@ -148,10 +148,10 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - cuda: 126
-            cuda_version: 12.6.3
+          - cuda: 128
+            cuda_version: 12.8.1
             python_version: "3.11"
-            pytorch: 2.7.1
+            pytorch: 2.9.1
             num_gpus: 2
             axolotl_extras:
             nightly_build: "true"
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 9cf231575..ae5ba1740 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -55,7 +55,7 @@ jobs:
       fail-fast: false
       matrix:
         python_version: ["3.11"]
-        pytorch_version: ["2.7.1", "2.8.0", "2.9.0"]
+        pytorch_version: ["2.8.0", "2.9.0", "2.9.1"]
     timeout-minutes: 20
 
     steps:
@@ -145,7 +145,7 @@ jobs:
       fail-fast: false
       matrix:
         python_version: ["3.11"]
-        pytorch_version: ["2.7.1", "2.8.0", "2.9.0"]
+        pytorch_version: ["2.8.0", "2.9.0", "2.9.1"]
     timeout-minutes: 20
 
     steps:
@@ -303,18 +303,6 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - cuda: 126
-            cuda_version: 12.6.3
-            python_version: "3.11"
-            pytorch: 2.7.1
-            num_gpus: 1
-            axolotl_extras:
-#          - cuda: 128
-#            cuda_version: 12.8.1
-#            python_version: "3.11"
-#            pytorch: 2.7.1
-#            num_gpus: 1
-#            axolotl_extras:
           - cuda: 128
             cuda_version: 12.8.1
             python_version: "3.11"
@@ -325,7 +313,7 @@ jobs:
           - cuda: 128
             cuda_version: 12.8.1
             python_version: "3.11"
-            pytorch: 2.9.0
+            pytorch: 2.9.1
             num_gpus: 1
             axolotl_extras:
     steps:
@@ -365,10 +353,10 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - cuda: 126
-            cuda_version: 12.6.3
+          - cuda: 128
+            cuda_version: 12.8.1
             python_version: "3.11"
-            pytorch: 2.7.1
+            pytorch: 2.9.1
             num_gpus: 1
             axolotl_extras:
     steps:
diff --git a/README.md b/README.md
index 01e0c44d9..0521f7bed 100644
--- a/README.md
+++ b/README.md
@@ -77,7 +77,7 @@ Features:
 
 - NVIDIA GPU (Ampere or newer for `bf16` and Flash Attention) or AMD GPU
 - Python 3.11
-- PyTorch ≥2.7.1
+- PyTorch ≥2.8.0
 
 ### Google Colab
 
diff --git a/docs/docker.qmd b/docs/docker.qmd
index da6184394..5d146eac2 100644
--- a/docs/docker.qmd
+++ b/docs/docker.qmd
@@ -32,11 +32,8 @@ main-base-py{python_version}-cu{cuda_version}-{pytorch_version}
 
 Tags examples:
 
-- `main-base-py3.11-cu128-2.7.1`
-- `main-base-py3.11-cu126-2.7.1`
-- `main-base-py3.11-cu126-2.7.0`
-- `main-base-py3.11-cu126-2.6.0`
-- `main-base-py3.11-cu124-2.6.0`
+- `main-base-py3.11-cu128-2.8.0`
+- `main-base-py3.11-cu128-2.9.1`
 
 ## Main
 
@@ -74,15 +71,12 @@ There may be some extra tags appended to the image, like `-vllm` which installs
 
 Tags examples:
 
-- `main-py3.11-cu128-2.7.1`
-- `main-py3.11-cu126-2.7.1`
-- `main-py3.11-cu126-2.7.0`
-- `main-py3.11-cu126-2.6.0`
-- `main-py3.11-cu124-2.6.0`
+- `main-py3.11-cu128-2.8.0`
+- `main-py3.11-cu128-2.9.1`
 - `main-latest`
 - `main-20250303-py3.11-cu124-2.6.0`
 - `main-20250303-py3.11-cu126-2.6.0`
-- `0.10.1`
+- `0.12.0`
 
 ## Cloud
 
diff --git a/docs/installation.qmd b/docs/installation.qmd
index 265ff238c..b8d427eb0 100644
--- a/docs/installation.qmd
+++ b/docs/installation.qmd
@@ -26,7 +26,7 @@ Follow the instructions at: [https://pytorch.org/get-started/locally/](https://p
 :::
 
 ::: {.callout-important}
-For Blackwell GPUs, please use Pytorch 2.7.0 and CUDA 12.8.
+For Blackwell GPUs, please use Pytorch 2.9.1 and CUDA 12.8.
 :::
 
 ### PyPI Installation (Recommended) {#sec-pypi}
@@ -111,7 +111,7 @@ docker run --privileged --gpus '"all"' --shm-size 10g --rm -it \
 :::
 
 ::: {.callout-important}
-For Blackwell GPUs, please use `axolotlai/axolotl:main-py3.11-cu128-2.7.0` or the cloud variant `axolotlai/axolotl-cloud:main-py3.11-cu128-2.7.0`.
+For Blackwell GPUs, please use `axolotlai/axolotl:main-py3.11-cu128-2.9.1` or the cloud variant `axolotlai/axolotl-cloud:main-py3.11-cu128-2.9.1`.
 :::
 
 Please refer to the [Docker documentation](docker.qmd) for more information on the different Docker images that are available.
diff --git a/src/axolotl/cli/cloud/baseten/template/train_sft.py b/src/axolotl/cli/cloud/baseten/template/train_sft.py
index 137fb9171..6dcf477c7 100644
--- a/src/axolotl/cli/cloud/baseten/template/train_sft.py
+++ b/src/axolotl/cli/cloud/baseten/template/train_sft.py
@@ -24,8 +24,7 @@ if launcher_args:
     launcher_args_str = "-- " + " ".join(launcher_args)
 
 # 1. Define a base image for your training job
-# must use torch 2.7.0 for vllm
-BASE_IMAGE = "axolotlai/axolotl:main-py3.11-cu126-2.7.1"
+BASE_IMAGE = "axolotlai/axolotl:main-py3.11-cu128-2.9.1"
 
 # 2. Define the Runtime Environment for the Training Job
 # This includes start commands and environment variables.a
diff --git a/src/axolotl/cli/cloud/modal_.py b/src/axolotl/cli/cloud/modal_.py
index 7f953372d..3e703a494 100644
--- a/src/axolotl/cli/cloud/modal_.py
+++ b/src/axolotl/cli/cloud/modal_.py
@@ -82,7 +82,7 @@ class ModalCloud(Cloud):
         return res
 
     def get_image(self):
-        docker_tag = "main-py3.11-cu126-2.7.1"
+        docker_tag = "main-py3.11-cu128-2.9.1"
         if self.config.docker_tag:
             docker_tag = self.config.docker_tag
         docker_image = f"axolotlai/axolotl:{docker_tag}"