run PR e2e docker CI tests in Modal (#1217) [skip ci]

* wip modal for ci * handle falcon layernorms better * update * rebuild the template each time with the pseudo-ARGS * fix ref * update tests to use modal * cleanup ci script * make sure to install jinja2 also * kickoff the gh action on gh hosted runners and specify num gpus
2024-01-26 16:13:27 -05:00
parent af29d81f80
commit 36d053f6f0
6 changed files with 141 additions and 42 deletions
--- a/docker/Dockerfile-tests
+++ b/docker/Dockerfile-tests
@@ -1,40 +0,0 @@
-ARG BASE_TAG=main-base
-FROM winglian/axolotl-base:$BASE_TAG
-
-ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX"
-ARG AXOLOTL_EXTRAS=""
-ARG CUDA="118"
-ENV BNB_CUDA_VERSION=$CUDA
-ARG PYTORCH_VERSION="2.0.1"
-ARG GITHUB_REF="main"
-
-ENV PYTORCH_VERSION=$PYTORCH_VERSION
-
-RUN apt-get update && \
-    apt-get install -y --allow-change-held-packages vim curl nano libnccl2 libnccl-dev
-
-WORKDIR /workspace
-
-RUN git clone --depth=1 https://github.com/OpenAccess-AI-Collective/axolotl.git
-
-WORKDIR /workspace/axolotl
-
-RUN git fetch origin +$GITHUB_REF && \
-    git checkout FETCH_HEAD
-
-# If AXOLOTL_EXTRAS is set, append it in brackets
-RUN if [ "$AXOLOTL_EXTRAS" != "" ] ; then \
-        pip install -e .[deepspeed,flash-attn,mamba-ssm,$AXOLOTL_EXTRAS]; \
-    else \
-        pip install -e .[deepspeed,flash-attn,mamba-ssm]; \
-    fi
-
-# So we can test the Docker image
-RUN pip install pytest
-
-# fix so that git fetch/pull from remote works
-RUN git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" && \
-    git config --get remote.origin.fetch
-
-# helper for huggingface-login cli
-RUN git config --global credential.helper store