From 990bec63e69ebf8a24a34895fd3ddf573db00fa2 Mon Sep 17 00:00:00 2001
From: Wing Lian <wing.lian@gmail.com>
Date: Sun, 7 May 2023 17:16:05 -0400
Subject: [PATCH] docker layer caching, build w axolotl from base build

---
 .github/workflows/base.yml |  3 +++
 .github/workflows/main.yml | 10 ++++++++--
 docker/Dockerfile          | 22 ++++++++--------------
 docker/Dockerfile-base     |  5 ++++-
 requirements.txt           |  2 --
 setup.py                   |  4 ++++
 6 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/base.yml b/.github/workflows/base.yml
index a03b03cce..134ffb7d5 100644
--- a/.github/workflows/base.yml
+++ b/.github/workflows/base.yml
@@ -9,6 +9,7 @@ on:
 jobs:
   build-base:
     if: github.repository_owner == 'OpenAccess-AI-Collective'
+    # this job needs to be run on self-hosted GPU runners...
     runs-on: self-hosted
     steps:
       - name: Checkout
@@ -33,3 +34,5 @@ jobs:
           push: ${{ github.event_name != 'pull_request' }}
           tags: ${{ steps.metadata.outputs.tags }}
           labels: ${{ steps.metadata.outputs.labels }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 742080540..acf170dbb 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -8,8 +8,10 @@ on:
 
 jobs:
   build-axolotl:
-    if: github.repository_owner == 'OpenAccess-AI-Collective'
-    runs-on: self-hosted
+    # We specify this so it doesn't run automatically in our other github org
+    # that we use to build the base on self-hosted GPU runners
+    if: github.repository_owner == 'winglian'
+    runs-on: ubuntu-latest
     steps:
       - name: Checkout
         uses: actions/checkout@v3
@@ -29,7 +31,11 @@ jobs:
         uses: docker/build-push-action@v4
         with:
           context: .
+          build-args: |
+            BASE_TAG=dev
           file: ./docker/Dockerfile
           push: ${{ github.event_name != 'pull_request' }}
           tags: ${{ steps.metadata.outputs.tags }}
           labels: ${{ steps.metadata.outputs.labels }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 91a6dbd55..4e84fff99 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,22 +1,16 @@
-FROM huggingface/transformers-pytorch-deepspeed-latest-gpu:latest
+ARG BASE_TAG=main
+FROM winglian/axolotl:$BASE_TAG
 
 ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX"
+
 RUN apt-get update && \
-    apt-get install -y build-essential ninja-build vim git-lfs && \
-    git lfs install --skip-repo && \
-    mkdir /tmp/wheels && \
-    cd /tmp/wheels && \
-    curl -L -O https://github.com/winglian/axolotl/raw/wheels/wheels/deepspeed-0.9.2%2B7ddc3b01-cp38-cp38-linux_x86_64.whl && \
-    curl -L -O https://github.com/winglian/axolotl/raw/wheels/wheels/flash_attn-1.0.4-cp38-cp38-linux_x86_64.whl && \
-    pip install deepspeed-0.9.2%2B7ddc3b01-cp38-cp38-linux_x86_64.whl && \
-    pip install flash_attn-1.0.4-cp38-cp38-linux_x86_64.whl && \
-    pip install "peft @ git+https://github.com/huggingface/peft.git@main" --force-reinstall --no-dependencies && \
-    pip install awscli
+    apt-get install -y vim \
 
 WORKDIR /workspace
+
+# The base image ships with `pydantic==1.8.2` which is not working
+RUN python3 -m pip install -U --no-cache-dir pydantic
+
 ARG REF=main
 RUN git clone https://github.com/winglian/axolotl && cd axolotl && git checkout $REF && \
     pip install -e .[int4]
-
-RUN pip3 install --force-reinstall https://download.pytorch.org/whl/nightly/cu117/torch-2.0.0.dev20230301%2Bcu117-cp38-cp38-linux_x86_64.whl --index-url https://download.pytorch.org/whl/nightly/cu117
-
diff --git a/docker/Dockerfile-base b/docker/Dockerfile-base
index 21716f27b..54738ddb8 100644
--- a/docker/Dockerfile-base
+++ b/docker/Dockerfile-base
@@ -66,4 +66,7 @@ RUN git lfs install --skip-repo
 RUN pip3 install "peft @ git+https://github.com/huggingface/peft.git@main" \
         "accelerate @ git+https://github.com/huggingface/accelerate.git@main" \
         "transformers @ git+https://github.com/huggingface/transformers.git@main" && \
-    pip3 install awscli
+    pip3 install awscli && \
+    # The base image ships with `pydantic==1.8.2` which is not working
+    pip3 install -U --no-cache-dir pydantic
+
diff --git a/requirements.txt b/requirements.txt
index 91e4267e2..1d6c17daa 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,7 +9,5 @@ datasets
 accelerate
 sentencepiece
 wandb
-flash-attn
-deepspeed
 einops
 xformers
diff --git a/setup.py b/setup.py
index 8abac9684..a183bcda1 100644
--- a/setup.py
+++ b/setup.py
@@ -23,5 +23,9 @@ setup(
         'int4_triton': [
             "alpaca_lora_4bit[triton] @ git+https://github.com/winglian/alpaca_lora_4bit.git@setup_pip",
         ],
+        'extras': [
+            'flash-attn',
+            'deepspeed',
+        ]
     },
 )