From ece0211996f0f546d8ec1380ab3f7e180fd9c2c0 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Mon, 15 Jan 2024 22:37:54 -0500 Subject: [PATCH] Agnostic cloud gpu docker image and Jupyter lab (#1097) --- .github/workflows/main.yml | 6 ++++-- README.md | 8 +++++--- docker/{Dockerfile-runpod => Dockerfile-cloud} | 10 ++++++---- scripts/{runpod-entrypoint.sh => cloud-entrypoint.sh} | 11 +++++++++++ 4 files changed, 26 insertions(+), 9 deletions(-) rename docker/{Dockerfile-runpod => Dockerfile-cloud} (69%) rename scripts/{runpod-entrypoint.sh => cloud-entrypoint.sh} (65%) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 2f0b07450..7bb1b0515 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -113,7 +113,7 @@ jobs: id: metadata uses: docker/metadata-action@v5 with: - images: winglian/axolotl-runpod + images: winglian/axolotl-cloud - name: Login to Docker Hub uses: docker/login-action@v3 with: @@ -128,9 +128,11 @@ jobs: build-args: | BASE_TAG=${{ github.ref_name }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }} CUDA=${{ matrix.cuda }} - file: ./docker/Dockerfile-runpod + file: ./docker/Dockerfile-cloud push: ${{ github.event_name != 'pull_request' }} tags: | ${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }} + winglian/axolotl-runpod:main-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }} ${{ (matrix.is_latest) && format('{0}-latest', steps.metadata.outputs.tags) || '' }} + ${{ (matrix.is_latest) && format('{0}-latest', 'winglian/axolotl-runpod:main') || '' }} labels: ${{ steps.metadata.outputs.labels }} diff --git a/README.md b/README.md index 28ffe090c..f5f848a44 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ Features: - [Installation](#installation) - [Docker](#docker) - [Conda/Pip venv](#condapip-venv) - - [Runpod](#runpod) + - [Cloud GPU](#cloud-gpu) - Runpod, Latitude - [LambdaLabs](#lambdalabs) - [Windows](#windows) - [Launching on public clouds via SkyPilot](#launching-on-public-clouds-via-skypilot) @@ -172,9 +172,11 @@ docker run --privileged --gpus '"all"' --shm-size 10g --rm -it --name axolotl -- ``` Get the token at huggingface.co/settings/tokens -#### Runpod +#### Cloud GPU -Use `winglian/axolotl-runpod:main-latest` or use this [direct link](https://runpod.io/gsc?template=v2ickqhz9s&ref=6i7fkpdz) +For cloud GPU providers that support docker images, use [`winglian/axolotl-cloud:main-latest`](https://hub.docker.com/r/winglian/axolotl-cloud/tags) + +- on RunPod use this [direct link](https://runpod.io/gsc?template=v2ickqhz9s&ref=6i7fkpdz) #### LambdaLabs
diff --git a/docker/Dockerfile-runpod b/docker/Dockerfile-cloud similarity index 69% rename from docker/Dockerfile-runpod rename to docker/Dockerfile-cloud index 9f3c60eee..16aa38914 100644 --- a/docker/Dockerfile-runpod +++ b/docker/Dockerfile-cloud @@ -7,14 +7,16 @@ ENV TRANSFORMERS_CACHE="/workspace/data/huggingface-cache/hub" ENV HF_HOME="/workspace/data/huggingface-cache/hub" ENV HF_HUB_ENABLE_HF_TRANSFER="1" -COPY scripts/runpod-entrypoint.sh /root/runpod-entrypoint.sh +COPY scripts/cloud-entrypoint.sh /root/cloud-entrypoint.sh +RUN pip install jupyterlab notebook && \ + jupyter lab clean RUN apt install --yes --no-install-recommends openssh-server tmux && \ mkdir -p ~/.ssh && \ chmod 700 ~/.ssh && \ printf "\n[[ -z \"\$TMUX\" ]] && { tmux attach-session -t ssh_tmux || tmux new-session -s ssh_tmux; exit; }\n" >> ~/.bashrc && \ - chmod +x /workspace/axolotl/scripts/runpod-entrypoint.sh && \ - chmod +x /root/runpod-entrypoint.sh + chmod +x /workspace/axolotl/scripts/cloud-entrypoint.sh && \ + chmod +x /root/cloud-entrypoint.sh -ENTRYPOINT ["/root/runpod-entrypoint.sh"] +ENTRYPOINT ["/root/cloud-entrypoint.sh"] CMD ["sleep", "infinity"] diff --git a/scripts/runpod-entrypoint.sh b/scripts/cloud-entrypoint.sh similarity index 65% rename from scripts/runpod-entrypoint.sh rename to scripts/cloud-entrypoint.sh index b7cef80b4..21ee88b85 100755 --- a/scripts/runpod-entrypoint.sh +++ b/scripts/cloud-entrypoint.sh @@ -17,5 +17,16 @@ else echo "No PUBLIC_KEY ENV variable provided, not starting openSSH daemon" fi +# Check if JUPYTER_PASSWORD is set and not empty +if [ -n "$JUPYTER_PASSWORD" ]; then + # Set JUPYTER_TOKEN to the value of JUPYTER_PASSWORD + export JUPYTER_TOKEN="$JUPYTER_PASSWORD" +fi + +if [ "$JUPYTER_DISABLE" != "1" ]; then + # Run Jupyter Lab in the background + jupyter lab --allow-root --ip 0.0.0.0 & +fi + # Execute the passed arguments (CMD) exec "$@"