deprecate torch 2.8.0 support (#3550)

* deprecate torch 2.8.0 support * shell lint * odd naming of manylinux wheels for x86
2026-03-25 18:22:47 -04:00
parent 5191e4eb53
commit 99bde0124c
8 changed files with 9 additions and 64 deletions
--- a/.github/workflows/base.yml
+++ b/.github/workflows/base.yml
@@ -30,14 +30,6 @@ jobs:
      fail-fast: false
      matrix:
        include:
-          - cuda: "128"
-            cuda_version: 12.8.1
-            cudnn_version: ""
-            python_version: "3.11"
-            pytorch: 2.8.0
-            torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
-            dockerfile: "Dockerfile-base"
-            platforms: "linux/amd64"
          - cuda: "128"
            cuda_version: 12.8.1
            cudnn_version: ""
@@ -160,14 +152,6 @@ jobs:
      fail-fast: false
      matrix:
        include:
-          - cuda: "128"
-            cuda_version: 12.8.1
-            cudnn_version: ""
-            python_version: "3.11"
-            pytorch: 2.8.0
-            torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
-            dockerfile: "Dockerfile-uv-base"
-            platforms: "linux/amd64"
          - cuda: "128"
            cuda_version: 12.8.1
            cudnn_version: ""
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -18,12 +18,6 @@ jobs:
      fail-fast: false
      matrix:
        include:
-          - cuda: 128
-            cuda_version: 12.8.1
-            python_version: "3.11"
-            pytorch: 2.8.0
-            axolotl_extras:
-            platforms: "linux/amd64"
          - cuda: 128
            cuda_version: 12.8.1
            python_version: "3.11"
@@ -186,12 +180,6 @@ jobs:
      fail-fast: false
      matrix:
        include:
-          - cuda: 128
-            cuda_version: 12.8.1
-            python_version: "3.11"
-            pytorch: 2.8.0
-            axolotl_extras:
-            platforms: "linux/amd64"
          - cuda: 128
            cuda_version: 12.8.1
            python_version: "3.11"
--- a/.github/workflows/multi-gpu-e2e.yml
+++ b/.github/workflows/multi-gpu-e2e.yml
@@ -33,12 +33,6 @@ jobs:
      fail-fast: false
      matrix:
        include:
-          - cuda: 128
-            cuda_version: 12.8.1
-            python_version: "3.11"
-            pytorch: 2.8.0
-            axolotl_extras: fbgemm-gpu
-            num_gpus: 2
 #          - cuda: 129
 #            cuda_version: 12.9.1
 #            python_version: "3.12"
--- a/.github/workflows/nightlies.yml
+++ b/.github/workflows/nightlies.yml
@@ -15,11 +15,6 @@ jobs:
      fail-fast: false
      matrix:
        include:
-          - cuda: 128
-            cuda_version: 12.8.1
-            python_version: "3.11"
-            pytorch: 2.8.0
-            axolotl_extras:
          - cuda: 128
            cuda_version: 12.8.1
            python_version: "3.11"
@@ -67,11 +62,6 @@ jobs:
    strategy:
      matrix:
        include:
-          - cuda: 128
-            cuda_version: 12.8.1
-            python_version: "3.11"
-            pytorch: 2.8.0
-            axolotl_extras:
          - cuda: 128
            cuda_version: 12.8.1
            python_version: "3.11"
--- a/.github/workflows/tests-nightly.yml
+++ b/.github/workflows/tests-nightly.yml
@@ -44,7 +44,7 @@ jobs:
      fail-fast: false
      matrix:
        python_version: ["3.12"]  # TODO include py3.14 once https://github.com/mistralai/mistral-common/pull/194 is merged
-        pytorch_version: ["2.8.0", "2.9.1", "2.10.0"]
+        pytorch_version: ["2.9.1", "2.10.0"]
    timeout-minutes: 20

    steps:
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -69,10 +69,8 @@ jobs:
      fail-fast: false
      matrix:
        python_version: ["3.12", "3.14"]
-        pytorch_version: ["2.8.0", "2.9.1", "2.10.0"]
+        pytorch_version: ["2.9.1", "2.10.0"]
        exclude:
-          - python_version: "3.14"
-            pytorch_version: "2.8.0"
          - python_version: "3.14"
            pytorch_version: "2.9.1"
    timeout-minutes: 20
@@ -165,10 +163,8 @@ jobs:
      fail-fast: false
      matrix:
        python_version: ["3.12", "3.14"]
-        pytorch_version: ["2.8.0", "2.9.1", "2.10.0"]
+        pytorch_version: ["2.9.1", "2.10.0"]
        exclude:
-          - python_version: "3.14"
-            pytorch_version: "2.8.0"
          - python_version: "3.14"
            pytorch_version: "2.9.1"
    timeout-minutes: 30
@@ -329,13 +325,6 @@ jobs:
      fail-fast: false
      matrix:
        include:
-          - cuda: 128
-            cuda_version: 12.8.1
-            python_version: "3.11"
-            pytorch: 2.8.0
-            num_gpus: 1
-            gpu_type: "B200"
-            axolotl_extras: fbgemm-gpu
          - cuda: 128
            cuda_version: 12.8.1
            python_version: "3.11"
--- a/README.md
+++ b/README.md
@@ -87,7 +87,7 @@ Features:

 - NVIDIA GPU (Ampere or newer for `bf16` and Flash Attention) or AMD GPU
 - Python 3.11
- PyTorch ≥2.8.0
+- PyTorch ≥2.9.1

 ### Google Colab

--- a/docker/Dockerfile-uv-base
+++ b/docker/Dockerfile-uv-base
@@ -36,22 +36,22 @@ RUN uv pip install packaging setuptools wheel psutil \
    && uv pip install awscli pydantic

 RUN if [ "$TARGETARCH" = "amd64" ]; then \
-        uv pip install --no-build-isolation "causal_conv1d @ git+https://github.com/Dao-AILab/causal-conv1d.git@main"; \
-        uv pip install "mamba_ssm @ git+https://github.com/state-spaces/mamba.git@main"; \
+        MAMBA_SKIP_CUDA_BUILD=TRUE CAUSAL_CONV1D_SKIP_CUDA_BUILD=TRUE uv pip install --no-build-isolation mamba_ssm causal_conv1d; \
    fi

 # Map Python version (e.g., 3.12 -> cp312)
 RUN PYTHON_CP="cp$(echo $PYTHON_VERSION | tr -d '.')" && \
    # Map PyTorch version (e.g., 2.9.1 -> torch2.9, 2.10.0 -> torch2.10)
    TORCH_TAG="torch$(echo $PYTORCH_VERSION | grep -oP '^\d+\.\d+')" && \
+    LINUX_TAG="manylinux_" && \
    # Map architecture
    case "$TARGETARCH" in \
-        amd64) ARCH_TAG="x86_64" ;; \
-        arm64) ARCH_TAG="aarch64" ;; \
+        amd64) ARCH_TAG="2_24_x86_64.manylinux_2_28_x86_64" ;; \
+        arm64) ARCH_TAG="2_34_aarch64" ;; \
        *) echo "Unsupported architecture: $TARGETARCH"; exit 1 ;; \
    esac && \
    WHL_VERSION="v0.7.16" && \
-    WHL_FILE="flash_attn-2.8.3+cu${CUDA}${TORCH_TAG}-${PYTHON_CP}-${PYTHON_CP}-linux_${ARCH_TAG}.whl" && \
+    WHL_FILE="flash_attn-2.8.3+cu${CUDA}${TORCH_TAG}-${PYTHON_CP}-${PYTHON_CP}-${LINUX_TAG}${ARCH_TAG}.whl" && \
    wget -nv "https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/${WHL_VERSION}/${WHL_FILE}" && \
    uv pip install --no-cache-dir "${WHL_FILE}" && \
    rm "${WHL_FILE}"