* flash attn pip * add packaging * add packaging to apt get * install flash attn in dockerfile * remove unused whls * add wheel * clean up pr fix packaging requirement for ci upgrade pip for ci skip build isolation for requiremnents to get flash-attn working install flash-attn seperately * install wheel for ci * no flash-attn for basic cicd * install flash-attn as pip extras --------- Co-authored-by: Ubuntu <mgh@mgh-vm.wsyvwcia0jxedeyrchqg425tpb.ax.internal.cloudapp.net> Co-authored-by: mhenrichsen <some_email@hey.com> Co-authored-by: Mads Henrichsen <mads@BrbartiendeMads.lan> Co-authored-by: Wing Lian <wing.lian@gmail.com>
80 lines
2.8 KiB
Plaintext
80 lines
2.8 KiB
Plaintext
ARG CUDA_VERSION="11.8.0"
|
|
ARG CUDNN_VERSION="8"
|
|
ARG UBUNTU_VERSION="22.04"
|
|
ARG MAX_JOBS=4
|
|
|
|
FROM nvidia/cuda:$CUDA_VERSION-cudnn$CUDNN_VERSION-devel-ubuntu$UBUNTU_VERSION as base-builder
|
|
|
|
ENV PATH="/root/miniconda3/bin:${PATH}"
|
|
|
|
ARG PYTHON_VERSION="3.9"
|
|
ARG PYTORCH_VERSION="2.0.1"
|
|
ARG CUDA="118"
|
|
|
|
ENV PYTHON_VERSION=$PYTHON_VERSION
|
|
|
|
RUN apt-get update
|
|
RUN apt-get install -y wget git build-essential ninja-build git-lfs libaio-dev && rm -rf /var/lib/apt/lists/*
|
|
|
|
RUN wget \
|
|
https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
|
|
&& mkdir /root/.conda \
|
|
&& bash Miniconda3-latest-Linux-x86_64.sh -b \
|
|
&& rm -f Miniconda3-latest-Linux-x86_64.sh
|
|
|
|
RUN conda create -n "py${PYTHON_VERSION}" python="${PYTHON_VERSION}"
|
|
|
|
ENV PATH="/root/miniconda3/envs/py${PYTHON_VERSION}/bin:${PATH}"
|
|
|
|
WORKDIR /workspace
|
|
|
|
RUN python3 -m pip install --upgrade pip && pip3 install packaging && \
|
|
python3 -m pip install --no-cache-dir -U torch==${PYTORCH_VERSION}+cu${CUDA} --extra-index-url https://download.pytorch.org/whl/cu$CUDA
|
|
|
|
FROM base-builder AS deepspeed-builder
|
|
|
|
ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 9.0+PTX"
|
|
|
|
WORKDIR /workspace
|
|
|
|
RUN git clone https://github.com/microsoft/DeepSpeed.git && \
|
|
cd DeepSpeed && \
|
|
MAX_CONCURRENCY=8 DS_BUILD_SPARSE_ATTN=0 DS_BUILD_OPS=1 python3 setup.py bdist_wheel
|
|
|
|
FROM base-builder AS bnb-builder
|
|
|
|
WORKDIR /workspace
|
|
ARG CUDA="118"
|
|
ENV CUDA=$CUDA
|
|
|
|
RUN git clone https://github.com/TimDettmers/bitsandbytes.git && \
|
|
cd bitsandbytes && \
|
|
CUDA_VERSION=$CUDA make cuda11x && \
|
|
python setup.py bdist_wheel
|
|
|
|
FROM base-builder
|
|
|
|
ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 9.0+PTX"
|
|
ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST
|
|
|
|
# recompile apex
|
|
RUN python3 -m pip uninstall -y apex
|
|
RUN git clone https://github.com/NVIDIA/apex
|
|
# `MAX_JOBS=1` disables parallel building to avoid cpu memory OOM when building image on GitHub Action (standard) runners
|
|
RUN cd apex && MAX_JOBS=1 python3 -m pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" ./
|
|
|
|
RUN mkdir -p /workspace/builds
|
|
COPY --from=bnb-builder /workspace/bitsandbytes /workspace/builds/bitsandbytes
|
|
|
|
RUN mkdir -p /workspace/wheels/bitsandbytes
|
|
COPY --from=deepspeed-builder /workspace/DeepSpeed/dist/deepspeed-*.whl wheels
|
|
COPY --from=bnb-builder /workspace/bitsandbytes/dist/bitsandbytes-*.whl wheels
|
|
COPY --from=bnb-builder /workspace/bitsandbytes/bitsandbytes/libbitsandbytes*.so wheels/bitsandbytes
|
|
|
|
RUN pip3 install wheels/deepspeed-*.whl
|
|
RUN cd /workspace/builds/bitsandbytes && python3 setup.py install
|
|
RUN git lfs install --skip-repo
|
|
RUN pip3 install awscli && \
|
|
# The base image ships with `pydantic==1.8.2` which is not working
|
|
pip3 install -U --no-cache-dir pydantic==1.10.10
|