diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 9ba06342b..8919a8825 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -31,10 +31,42 @@ jobs: with: context: . build-args: | - BASE_TAG=dev-base + BASE_TAG=${{ github.ref_name }}-base file: ./docker/Dockerfile push: ${{ github.event_name != 'pull_request' }} tags: ${{ steps.metadata.outputs.tags }} labels: ${{ steps.metadata.outputs.labels }} cache-from: type=gha cache-to: type=gha,mode=max + build-axolotl-runpod: + needs: build-axolotl + if: github.repository_owner == 'OpenAccess-AI-Collective' + # this job needs to be run on self-hosted GPU runners... + runs-on: self-hosted + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Docker metadata + id: metadata + uses: docker/metadata-action@v3 + with: + images: winglian/axolotl-runpod + - name: Login to Docker Hub + uses: docker/login-action@v2 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + - name: Build + uses: docker/build-push-action@v4 + with: + context: . + build-args: | + BASE_TAG=${{ github.ref_name }} + file: ./docker/Dockerfile-runpod + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.metadata.outputs.tags }} + labels: ${{ steps.metadata.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/docker/Dockerfile b/docker/Dockerfile index d87ac8f9d..a47d856c8 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -4,7 +4,7 @@ FROM winglian/axolotl-base:$BASE_TAG ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX" RUN apt-get update && \ - apt-get install -y vim + apt-get install -y vim curl WORKDIR /workspace @@ -16,3 +16,6 @@ RUN mkdir axolotl COPY . axolotl/ RUN cd axolotl && \ pip install -e .[int4] + +# helper for huggingface-login cli +RUN git config --global credential.helper store diff --git a/docker/Dockerfile-runpod b/docker/Dockerfile-runpod new file mode 100644 index 000000000..019cff74c --- /dev/null +++ b/docker/Dockerfile-runpod @@ -0,0 +1,11 @@ +ARG BASE_TAG=main +FROM winglian/axolotl:$BASE_TAG + +RUN apt install --yes --no-install-recommends openssh-server tmux && \ + mkdir -p ~/.ssh && \ + chmod 700 ~/.ssh && \ + echo -e "\n[[ -z \"\$TMUX\" ]] && { tmux attach-session -t ssh_tmux || tmux new-session -s ssh_tmux; exit; }" >> ~/.bashrc && \ + chmod +x /workdir/axolotl/scripts/runpod-entrypoint.sh + +ENTRYPOINT ["/workdir/axolotl/scripts/runpod-entrypoint.sh"] +CMD ["sleep", "infinity"] diff --git a/scripts/runpod-entrypoint.sh b/scripts/runpod-entrypoint.sh new file mode 100644 index 000000000..f712e4ab7 --- /dev/null +++ b/scripts/runpod-entrypoint.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +echo $PUBLIC_KEY >> ~/.ssh/authorized_keys +chmod 700 -R ~/.ssh + +# Start the SSH service in the background +service ssh start + +# Execute the passed arguments (CMD) +exec "$@"