diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1395f7400..d20db7065 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -13,17 +13,17 @@ jobs: fail-fast: false matrix: include: - - cuda: cu118 + - cuda: 118 cuda_version: 11.8.0 python_version: "3.9" pytorch: 2.0.1 axolotl_extras: - - cuda: cu118 + - cuda: 118 cuda_version: 11.8.0 python_version: "3.10" pytorch: 2.0.1 axolotl_extras: - - cuda: cu118 + - cuda: 118 cuda_version: 11.8.0 python_version: "3.9" pytorch: 2.0.1 @@ -49,10 +49,11 @@ jobs: with: context: . build-args: | - BASE_TAG=${{ github.ref_name }}-base-py${{ matrix.python_version }}-${{ matrix.cuda }}-${{ matrix.pytorch }} + BASE_TAG=${{ github.ref_name }}-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }} + CUDA=${{ matrix.cuda }} file: ./docker/Dockerfile push: ${{ github.event_name != 'pull_request' }} - tags: ${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }} + tags: ${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }} labels: ${{ steps.metadata.outputs.labels }} build-axolotl-runpod: needs: build-axolotl diff --git a/README.md b/README.md index 1be423454..c61899c13 100644 --- a/README.md +++ b/README.md @@ -69,7 +69,7 @@ Get started with Axolotl in just a few steps! This quickstart guide will walk yo ```bash git clone https://github.com/OpenAccess-AI-Collective/axolotl -pip3 install -e . +pip3 install -e .[flash-attn] pip3 install -U git+https://github.com/huggingface/peft.git # finetune lora diff --git a/docker/Dockerfile b/docker/Dockerfile index b5198e1d9..a9d94f03c 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -16,9 +16,9 @@ RUN git clone --depth=1 https://github.com/OpenAccess-AI-Collective/axolotl.git # If AXOLOTL_EXTRAS is set, append it in brackets RUN cd axolotl && \ if [ "$AXOLOTL_EXTRAS" != "" ] ; then \ - pip install -e .[$AXOLOTL_EXTRAS]; \ + pip install -e .[flash-attn,$AXOLOTL_EXTRAS]; \ else \ - pip install -e .; \ + pip install -e .[flash-attn]; \ fi # fix so that git fetch/pull from remote works diff --git a/docker/Dockerfile-base b/docker/Dockerfile-base index 6fbe00ddf..a39f47329 100644 --- a/docker/Dockerfile-base +++ b/docker/Dockerfile-base @@ -31,26 +31,6 @@ WORKDIR /workspace RUN python3 -m pip install --upgrade pip && pip3 install packaging && \ python3 -m pip install --no-cache-dir -U torch==${PYTORCH_VERSION}+cu${CUDA} --extra-index-url https://download.pytorch.org/whl/cu$CUDA - -FROM base-builder AS flash-attn-builder - -WORKDIR /workspace - -ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 9.0+PTX" - -RUN git clone https://github.com/Dao-AILab/flash-attention.git && \ - cd flash-attention && \ - git checkout v2.0.4 && \ - python3 setup.py bdist_wheel && \ - cd csrc/fused_dense_lib && \ - python3 setup.py bdist_wheel && \ - cd ../xentropy && \ - python3 setup.py bdist_wheel && \ - cd ../rotary && \ - python3 setup.py bdist_wheel && \ - cd ../layer_norm && \ - python3 setup.py bdist_wheel - FROM base-builder AS deepspeed-builder ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 9.0+PTX" @@ -90,13 +70,8 @@ RUN mkdir -p /workspace/wheels/bitsandbytes COPY --from=deepspeed-builder /workspace/DeepSpeed/dist/deepspeed-*.whl wheels COPY --from=bnb-builder /workspace/bitsandbytes/dist/bitsandbytes-*.whl wheels COPY --from=bnb-builder /workspace/bitsandbytes/bitsandbytes/libbitsandbytes*.so wheels/bitsandbytes -COPY --from=flash-attn-builder /workspace/flash-attention/dist/flash_attn-*.whl wheels -COPY --from=flash-attn-builder /workspace/flash-attention/csrc/fused_dense_lib/dist/fused_dense_lib-*.whl wheels -COPY --from=flash-attn-builder /workspace/flash-attention/csrc/xentropy/dist/xentropy_cuda_lib-*.whl wheels -COPY --from=flash-attn-builder /workspace/flash-attention/csrc/rotary/dist/rotary_emb-*.whl wheels -COPY --from=flash-attn-builder /workspace/flash-attention/csrc/layer_norm/dist/dropout_layer_norm-*.whl wheels -RUN pip3 install wheels/deepspeed-*.whl wheels/flash_attn-*.whl wheels/fused_dense_lib-*.whl wheels/xentropy_cuda_lib-*.whl wheels/rotary_emb-*.whl wheels/dropout_layer_norm-*.whl +RUN pip3 install wheels/deepspeed-*.whl RUN cd /workspace/builds/bitsandbytes && python3 setup.py install RUN git lfs install --skip-repo RUN pip3 install awscli && \ diff --git a/requirements.txt b/requirements.txt index ed7e0fb7f..a9f1d5047 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,7 @@ addict fire PyYAML==6.0 datasets +flash-attn==2.0.8 sentencepiece wandb einops diff --git a/setup.py b/setup.py index 85dde35a1..6cacd0c5b 100644 --- a/setup.py +++ b/setup.py @@ -7,6 +7,7 @@ with open("./requirements.txt", encoding="utf-8") as requirements_file: # don't include peft yet until we check the int4 # need to manually install peft for now... reqs = [r.strip() for r in requirements_file.readlines() if "peft" not in r] + reqs = [r for r in reqs if "flash-attn" not in r] reqs = [r for r in reqs if r and r[0] != "#"] for r in reqs: install_requires.append(r) @@ -25,8 +26,10 @@ setup( "gptq_triton": [ "alpaca_lora_4bit[triton] @ git+https://github.com/winglian/alpaca_lora_4bit.git@setup_pip", ], + "flash-attn": [ + "flash-attn==2.0.8", + ], "extras": [ - "flash-attn", "deepspeed", ], },