From 581dd324cc7fee332d0cfe906a3e8665fdac0034 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Wed, 11 Jun 2025 17:11:06 -0400 Subject: [PATCH] build base images for torch 2.7.1 (#2764) * build base images for torch 2.7.1 * fix: update base docker to use torch 2.7.1 * fix: update doc for main base to use 2.7.1 * make sure to install fa2 in base uv too * use no build isolation for uv+flashattn * install psutil also for fa2 * longer timeout for flash attn build --------- Co-authored-by: NanoCode012 --- .github/workflows/base.yml | 8 +++++--- docker/Dockerfile-base | 2 +- docker/Dockerfile-base-next | 2 +- docker/Dockerfile-uv-base | 6 +++++- docs/docker.qmd | 6 +++--- 5 files changed, 15 insertions(+), 9 deletions(-) diff --git a/.github/workflows/base.yml b/.github/workflows/base.yml index 6b750fc5a..966bd2f5b 100644 --- a/.github/workflows/base.yml +++ b/.github/workflows/base.yml @@ -16,6 +16,7 @@ on: jobs: build-base: if: github.repository_owner == 'axolotl-ai-cloud' + timeout-minutes: 480 # this job needs to be run on self-hosted GPU runners... runs-on: ubuntu-latest-m strategy: @@ -47,14 +48,14 @@ jobs: cuda_version: 12.6.3 cudnn_version: "" python_version: "3.11" - pytorch: 2.7.0 + pytorch: 2.7.1 torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX" dockerfile: "Dockerfile-base" - cuda: "128" cuda_version: 12.6.3 cudnn_version: "" python_version: "3.11" - pytorch: 2.7.0 + pytorch: 2.7.1 torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX" dockerfile: "Dockerfile-base" - cuda: "128" @@ -106,6 +107,7 @@ jobs: TORCH_CUDA_ARCH_LIST=${{ matrix.torch_cuda_arch_list }} build-base-uv: if: github.repository_owner == 'axolotl-ai-cloud' + timeout-minutes: 480 runs-on: ubuntu-latest-m strategy: fail-fast: false @@ -122,7 +124,7 @@ jobs: cuda_version: 12.8.1 cudnn_version: "" python_version: "3.11" - pytorch: 2.7.0 + pytorch: 2.7.1 torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX" dockerfile: "Dockerfile-uv-base" steps: diff --git a/docker/Dockerfile-base b/docker/Dockerfile-base index cf1af9682..cc9ca2f2d 100644 --- a/docker/Dockerfile-base +++ b/docker/Dockerfile-base @@ -38,6 +38,6 @@ RUN git lfs install --skip-repo && \ # The base image ships with `pydantic==1.8.2` which is not working pip3 install -U --no-cache-dir pydantic==1.10.10 -RUN if [ "$PYTORCH_VERSION" = "2.7.0" ] ; then \ +RUN if [ "$PYTORCH_VERSION" = "2.7.1" ] ; then \ pip3 install flash-attn==2.7.4.post1; \ fi diff --git a/docker/Dockerfile-base-next b/docker/Dockerfile-base-next index a968b5913..85bac2516 100644 --- a/docker/Dockerfile-base-next +++ b/docker/Dockerfile-base-next @@ -29,7 +29,7 @@ ENV PATH="/root/miniconda3/envs/py${PYTHON_VERSION}/bin:${PATH}" WORKDIR /workspace RUN python3 -m pip install --upgrade pip && pip3 install packaging && \ - python3 -m pip install --no-cache-dir -U torch==2.7.0 --extra-index-url https://download.pytorch.org/whl/test/cu$CUDA && \ + python3 -m pip install --no-cache-dir -U torch==2.7.1 --extra-index-url https://download.pytorch.org/whl/test/cu$CUDA && \ python3 -m pip install --no-cache-dir "causal_conv1d @ git+https://github.com/Dao-AILab/causal-conv1d.git@main" && \ python3 -m pip install --no-cache-dir "mamba_ssm @ git+https://github.com/state-spaces/mamba.git@main" diff --git a/docker/Dockerfile-uv-base b/docker/Dockerfile-uv-base index 5ac8d86c7..c612278ae 100644 --- a/docker/Dockerfile-uv-base +++ b/docker/Dockerfile-uv-base @@ -29,8 +29,12 @@ RUN uv venv --no-project --relocatable axolotl-venv ENV PATH="/workspace/axolotl-venv/bin:${PATH}" -RUN uv pip install packaging setuptools wheel \ +RUN uv pip install packaging setuptools wheel psutil \ && uv pip install torch==${PYTORCH_VERSION} \ && uv pip install --no-build-isolation "causal_conv1d @ git+https://github.com/Dao-AILab/causal-conv1d.git@main" \ && uv pip install "mamba_ssm @ git+https://github.com/state-spaces/mamba.git@main" \ && uv pip install awscli pydantic + +RUN if [ "$PYTORCH_VERSION" = "2.7.1" ] ; then \ + uv pip install --no-build-isolation flash-attn==2.7.4.post1; \ + fi diff --git a/docs/docker.qmd b/docs/docker.qmd index 7b236b960..bc26a795f 100644 --- a/docs/docker.qmd +++ b/docs/docker.qmd @@ -9,7 +9,7 @@ format: This section describes the different Docker images that are released by AxolotlAI at [Docker Hub](https://hub.docker.com/u/axolotlai). ::: {.callout-important} -For Blackwell GPUs, please use the tags with Pytorch 2.7.0 and CUDA 12.8. +For Blackwell GPUs, please use the tags with Pytorch 2.7.1 and CUDA 12.8. ::: ## Base @@ -32,8 +32,8 @@ main-base-py{python_version}-cu{cuda_version}-{pytorch_version} Tags examples: -- `main-base-py3.11-cu128-2.7.0` -- `main-base-py3.11-cu126-2.7.0` +- `main-base-py3.11-cu128-2.7.1` +- `main-base-py3.11-cu126-2.7.1` - `main-base-py3.11-cu124-2.6.0` - `main-base-py3.11-cu124-2.5.1`