From ee529e2354f29b3ba2baf7ba3ef9fbb052458e8e Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Thu, 20 Mar 2025 00:15:55 -0400 Subject: [PATCH] use nightly --- .github/workflows/base.yml | 4 ++-- docker/Dockerfile-base-nightly | 39 ++++++++++++++++++++++++++++++++++ requirements.txt | 2 +- 3 files changed, 42 insertions(+), 3 deletions(-) create mode 100644 docker/Dockerfile-base-nightly diff --git a/.github/workflows/base.yml b/.github/workflows/base.yml index cb61e8494..cf5c1d45d 100644 --- a/.github/workflows/base.yml +++ b/.github/workflows/base.yml @@ -44,7 +44,7 @@ jobs: cuda_version: 12.8.1 cudnn_version: "" python_version: "3.11" - pytorch: 2.6.0 + pytorch: nightly torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX" steps: - name: Checkout @@ -67,7 +67,7 @@ jobs: uses: docker/build-push-action@v4 with: context: . - file: ./docker/Dockerfile-base + file: ${{ matrix.pytorch == 'nightly' && './docker/Dockerfile-base-nightly' || './docker/Dockerfile-base' }} push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.metadata.outputs.tags }}-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }} labels: ${{ steps.metadata.outputs.labels }} diff --git a/docker/Dockerfile-base-nightly b/docker/Dockerfile-base-nightly new file mode 100644 index 000000000..85805ea41 --- /dev/null +++ b/docker/Dockerfile-base-nightly @@ -0,0 +1,39 @@ +ARG CUDA_VERSION="12.8.1" +ARG CUDNN_VERSION="8" +ARG UBUNTU_VERSION="22.04" +ARG MAX_JOBS=4 + +FROM nvidia/cuda:$CUDA_VERSION-cudnn$CUDNN_VERSION-devel-ubuntu$UBUNTU_VERSION AS base-builder + +ENV PATH="/root/miniconda3/bin:${PATH}" + +ARG PYTHON_VERSION="3.11" +ARG PYTORCH_VERSION="nightly" +ARG CUDA="128" +ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 9.0+PTX" + +ENV PYTHON_VERSION=$PYTHON_VERSION +ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST + +RUN apt-get update \ + && apt-get install -y wget git build-essential ninja-build git-lfs libaio-dev pkg-config && rm -rf /var/lib/apt/lists/* \ + && wget \ + https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \ + && mkdir /root/.conda \ + && bash Miniconda3-latest-Linux-x86_64.sh -b \ + && rm -f Miniconda3-latest-Linux-x86_64.sh \ + && conda create -n "py${PYTHON_VERSION}" python="${PYTHON_VERSION}" + +ENV PATH="/root/miniconda3/envs/py${PYTHON_VERSION}/bin:${PATH}" + +WORKDIR /workspace + +RUN python3 -m pip install --upgrade pip && pip3 install packaging && \ + python3 -m pip install --no-cache-dir -U torch --extra-index-url https://download.pytorch.org/whl/nightly/cu$CUDA && \ + python3 -m pip install --no-cache-dir "causal_conv1d @ git+https://github.com/Dao-AILab/causal-conv1d.git@main" && \ + python3 -m pip install --no-cache-dir "mamba_ssm @ git+https://github.com/state-spaces/mamba.git@main" + +RUN git lfs install --skip-repo && \ + pip3 install awscli && \ + # The base image ships with `pydantic==1.8.2` which is not working + pip3 install -U --no-cache-dir pydantic==1.10.10 diff --git a/requirements.txt b/requirements.txt index ebefc7ad4..fe5b531e7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/ # START section of dependencies that don't install on Darwin/MacOS -bitsandbytes==0.45.2 +bitsandbytes==0.45.3 triton>=3.0.0 mamba-ssm==1.2.0.post1 flash-attn==2.7.4.post1