run PR e2e docker CI tests in Modal (#1217) [skip ci]

* wip modal for ci

* handle falcon layernorms better

* update

* rebuild the template each time with the pseudo-ARGS

* fix ref

* update tests to use modal

* cleanup ci script

* make sure to install jinja2 also

* kickoff the gh action on gh hosted runners and specify num gpus
This commit is contained in:
Wing Lian
2024-01-26 16:13:27 -05:00
committed by GitHub
parent af29d81f80
commit 36d053f6f0
6 changed files with 141 additions and 42 deletions

View File

@@ -1,40 +0,0 @@
ARG BASE_TAG=main-base
FROM winglian/axolotl-base:$BASE_TAG
ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX"
ARG AXOLOTL_EXTRAS=""
ARG CUDA="118"
ENV BNB_CUDA_VERSION=$CUDA
ARG PYTORCH_VERSION="2.0.1"
ARG GITHUB_REF="main"
ENV PYTORCH_VERSION=$PYTORCH_VERSION
RUN apt-get update && \
apt-get install -y --allow-change-held-packages vim curl nano libnccl2 libnccl-dev
WORKDIR /workspace
RUN git clone --depth=1 https://github.com/OpenAccess-AI-Collective/axolotl.git
WORKDIR /workspace/axolotl
RUN git fetch origin +$GITHUB_REF && \
git checkout FETCH_HEAD
# If AXOLOTL_EXTRAS is set, append it in brackets
RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
pip install -e .[deepspeed,flash-attn,mamba-ssm,$AXOLOTL_EXTRAS]; \
else \
pip install -e .[deepspeed,flash-attn,mamba-ssm]; \
fi
# So we can test the Docker image
RUN pip install pytest
# fix so that git fetch/pull from remote works
RUN git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" && \
git config --get remote.origin.fetch
# helper for huggingface-login cli
RUN git config --global credential.helper store