diff --git a/.github/workflows/base.yml b/.github/workflows/base.yml index 0fe0d2b25..521d26201 100644 --- a/.github/workflows/base.yml +++ b/.github/workflows/base.yml @@ -30,14 +30,6 @@ jobs: fail-fast: false matrix: include: - - cuda: "128" - cuda_version: 12.8.1 - cudnn_version: "" - python_version: "3.11" - pytorch: 2.8.0 - torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX" - dockerfile: "Dockerfile-base" - platforms: "linux/amd64" - cuda: "128" cuda_version: 12.8.1 cudnn_version: "" @@ -160,14 +152,6 @@ jobs: fail-fast: false matrix: include: - - cuda: "128" - cuda_version: 12.8.1 - cudnn_version: "" - python_version: "3.11" - pytorch: 2.8.0 - torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX" - dockerfile: "Dockerfile-uv-base" - platforms: "linux/amd64" - cuda: "128" cuda_version: 12.8.1 cudnn_version: "" diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index a3a24537c..1fb6290d9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -18,12 +18,6 @@ jobs: fail-fast: false matrix: include: - - cuda: 128 - cuda_version: 12.8.1 - python_version: "3.11" - pytorch: 2.8.0 - axolotl_extras: - platforms: "linux/amd64" - cuda: 128 cuda_version: 12.8.1 python_version: "3.11" @@ -186,12 +180,6 @@ jobs: fail-fast: false matrix: include: - - cuda: 128 - cuda_version: 12.8.1 - python_version: "3.11" - pytorch: 2.8.0 - axolotl_extras: - platforms: "linux/amd64" - cuda: 128 cuda_version: 12.8.1 python_version: "3.11" diff --git a/.github/workflows/multi-gpu-e2e.yml b/.github/workflows/multi-gpu-e2e.yml index 2bb499ded..2c5d76e4c 100644 --- a/.github/workflows/multi-gpu-e2e.yml +++ b/.github/workflows/multi-gpu-e2e.yml @@ -33,12 +33,6 @@ jobs: fail-fast: false matrix: include: - - cuda: 128 - cuda_version: 12.8.1 - python_version: "3.11" - pytorch: 2.8.0 - axolotl_extras: fbgemm-gpu - num_gpus: 2 # - cuda: 129 # cuda_version: 12.9.1 # python_version: "3.12" diff --git a/.github/workflows/nightlies.yml b/.github/workflows/nightlies.yml index 0372f5c7a..19643bea5 100644 --- a/.github/workflows/nightlies.yml +++ b/.github/workflows/nightlies.yml @@ -15,11 +15,6 @@ jobs: fail-fast: false matrix: include: - - cuda: 128 - cuda_version: 12.8.1 - python_version: "3.11" - pytorch: 2.8.0 - axolotl_extras: - cuda: 128 cuda_version: 12.8.1 python_version: "3.11" @@ -67,11 +62,6 @@ jobs: strategy: matrix: include: - - cuda: 128 - cuda_version: 12.8.1 - python_version: "3.11" - pytorch: 2.8.0 - axolotl_extras: - cuda: 128 cuda_version: 12.8.1 python_version: "3.11" diff --git a/.github/workflows/tests-nightly.yml b/.github/workflows/tests-nightly.yml index 663b0476e..235aebcfa 100644 --- a/.github/workflows/tests-nightly.yml +++ b/.github/workflows/tests-nightly.yml @@ -44,7 +44,7 @@ jobs: fail-fast: false matrix: python_version: ["3.12"] # TODO include py3.14 once https://github.com/mistralai/mistral-common/pull/194 is merged - pytorch_version: ["2.8.0", "2.9.1", "2.10.0"] + pytorch_version: ["2.9.1", "2.10.0"] timeout-minutes: 20 steps: diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 5099e447c..d753afe01 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -69,10 +69,8 @@ jobs: fail-fast: false matrix: python_version: ["3.12", "3.14"] - pytorch_version: ["2.8.0", "2.9.1", "2.10.0"] + pytorch_version: ["2.9.1", "2.10.0"] exclude: - - python_version: "3.14" - pytorch_version: "2.8.0" - python_version: "3.14" pytorch_version: "2.9.1" timeout-minutes: 20 @@ -165,10 +163,8 @@ jobs: fail-fast: false matrix: python_version: ["3.12", "3.14"] - pytorch_version: ["2.8.0", "2.9.1", "2.10.0"] + pytorch_version: ["2.9.1", "2.10.0"] exclude: - - python_version: "3.14" - pytorch_version: "2.8.0" - python_version: "3.14" pytorch_version: "2.9.1" timeout-minutes: 30 @@ -329,13 +325,6 @@ jobs: fail-fast: false matrix: include: - - cuda: 128 - cuda_version: 12.8.1 - python_version: "3.11" - pytorch: 2.8.0 - num_gpus: 1 - gpu_type: "B200" - axolotl_extras: fbgemm-gpu - cuda: 128 cuda_version: 12.8.1 python_version: "3.11" diff --git a/README.md b/README.md index a425e45b8..e353d20ad 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,7 @@ Features: - NVIDIA GPU (Ampere or newer for `bf16` and Flash Attention) or AMD GPU - Python 3.11 -- PyTorch ≥2.8.0 +- PyTorch ≥2.9.1 ### Google Colab diff --git a/docker/Dockerfile-uv-base b/docker/Dockerfile-uv-base index 0e7acbe29..f16777378 100644 --- a/docker/Dockerfile-uv-base +++ b/docker/Dockerfile-uv-base @@ -36,22 +36,22 @@ RUN uv pip install packaging setuptools wheel psutil \ && uv pip install awscli pydantic RUN if [ "$TARGETARCH" = "amd64" ]; then \ - uv pip install --no-build-isolation "causal_conv1d @ git+https://github.com/Dao-AILab/causal-conv1d.git@main"; \ - uv pip install "mamba_ssm @ git+https://github.com/state-spaces/mamba.git@main"; \ + MAMBA_SKIP_CUDA_BUILD=TRUE CAUSAL_CONV1D_SKIP_CUDA_BUILD=TRUE uv pip install --no-build-isolation mamba_ssm causal_conv1d; \ fi # Map Python version (e.g., 3.12 -> cp312) RUN PYTHON_CP="cp$(echo $PYTHON_VERSION | tr -d '.')" && \ # Map PyTorch version (e.g., 2.9.1 -> torch2.9, 2.10.0 -> torch2.10) TORCH_TAG="torch$(echo $PYTORCH_VERSION | grep -oP '^\d+\.\d+')" && \ + LINUX_TAG="manylinux_" && \ # Map architecture case "$TARGETARCH" in \ - amd64) ARCH_TAG="x86_64" ;; \ - arm64) ARCH_TAG="aarch64" ;; \ + amd64) ARCH_TAG="2_24_x86_64.manylinux_2_28_x86_64" ;; \ + arm64) ARCH_TAG="2_34_aarch64" ;; \ *) echo "Unsupported architecture: $TARGETARCH"; exit 1 ;; \ esac && \ WHL_VERSION="v0.7.16" && \ - WHL_FILE="flash_attn-2.8.3+cu${CUDA}${TORCH_TAG}-${PYTHON_CP}-${PYTHON_CP}-linux_${ARCH_TAG}.whl" && \ + WHL_FILE="flash_attn-2.8.3+cu${CUDA}${TORCH_TAG}-${PYTHON_CP}-${PYTHON_CP}-${LINUX_TAG}${ARCH_TAG}.whl" && \ wget -nv "https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/${WHL_VERSION}/${WHL_FILE}" && \ uv pip install --no-cache-dir "${WHL_FILE}" && \ rm "${WHL_FILE}"