use low_cpu_mem_usage with ds zero 1 or 2

use low_cpu_mem_usage when using deepspeed
Agnostic cloud gpu docker image and Jupyter lab (#1097 )
2024-01-16 19:33:44 -05:00 · 2024-01-16 07:44:35 -05:00 · 2024-01-15 22:37:54 -05:00 · 2024-01-15 21:29:55 -05:00 · 2024-01-16 09:47:33 +09:00
7 changed files with 58 additions and 13 deletions
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -113,7 +113,7 @@ jobs:
        id: metadata
        uses: docker/metadata-action@v5
        with:
-          images: winglian/axolotl-runpod
+          images: winglian/axolotl-cloud
      - name: Login to Docker Hub
        uses: docker/login-action@v3
        with:
@@ -128,9 +128,11 @@ jobs:
          build-args: |
            BASE_TAG=${{ github.ref_name }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
            CUDA=${{ matrix.cuda }}
-          file: ./docker/Dockerfile-runpod
+          file: ./docker/Dockerfile-cloud
          push: ${{ github.event_name != 'pull_request' }}
          tags: |
             ${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
+             winglian/axolotl-runpod:main-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
             ${{ (matrix.is_latest) && format('{0}-latest', steps.metadata.outputs.tags) || '' }}
+             ${{ (matrix.is_latest) && format('{0}-latest', 'winglian/axolotl-runpod:main') || '' }}
          labels: ${{ steps.metadata.outputs.labels }}
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@ Features:
 - [Installation](#installation)
  - [Docker](#docker)
  - [Conda/Pip venv](#condapip-venv)
-  - [Runpod](#runpod)
+  - [Cloud GPU](#cloud-gpu) - Runpod, Latitude
  - [LambdaLabs](#lambdalabs)
  - [Windows](#windows)
  - [Launching on public clouds via SkyPilot](#launching-on-public-clouds-via-skypilot)
@@ -172,9 +172,11 @@ docker run --privileged --gpus '"all"' --shm-size 10g --rm -it --name axolotl --
        ```
        Get the token at huggingface.co/settings/tokens

-#### Runpod
+#### Cloud GPU

-Use `winglian/axolotl-runpod:main-latest` or use this [direct link](https://runpod.io/gsc?template=v2ickqhz9s&ref=6i7fkpdz)
+For cloud GPU providers that support docker images, use [`winglian/axolotl-cloud:main-latest`](https://hub.docker.com/r/winglian/axolotl-cloud/tags)
+
+- on RunPod use this [direct link](https://runpod.io/gsc?template=v2ickqhz9s&ref=6i7fkpdz)

 #### LambdaLabs
  <details>
@@ -374,7 +376,7 @@ Have dataset(s) in one of the following format (JSONL recommended):
 For a dataset that is preprocessed for instruction purposes:

 ```json
-{"instruction": "...", "output": "..."}
+{"input": "...", "output": "..."}
 ```

 You can use this example in your YAML config:
@@ -385,6 +387,8 @@ datasets:
    type:
      system_prompt: ""
      field_system: system
+      field_instruction: input
+      field_output: output
      format: "[INST] {instruction} [/INST]"
      no_input_format: "[INST] {instruction} [/INST]"
 ```
@@ -577,10 +581,10 @@ datasets:
    field_human: # Optional[str]. Human key to use for conversation.
    field_model: # Optional[str]. Assistant key to use for conversation.

-  # Custom user prompt
+  # Custom user instruction prompt
  - path: repo
    type:
-      # The below are defaults. only set what's needed.
+      # The below are defaults. only set what's needed if you use a different column name.
      system_prompt: ""
      system_format: "{system}"
      field_system: system
@@ -589,6 +593,7 @@ datasets:
      field_output: output

      # Customizable to be single line or multi-line
+      # Use {instruction}/{input} as key to be replaced
      # 'format' can include {input}
      format: |-
        User: {instruction} {input}
@@ -674,7 +679,8 @@ lora_target_modules:
 #  - gate_proj
 #  - down_proj
 #  - up_proj
-lora_target_linear: # If true, will target all linear layers
+lora_target_linear: # If true, will target all linear modules
+peft_layers_to_transform: # The layer indices to transform, otherwise, apply to all layers

 # If you added new tokens to the tokenizer, you may need to save some LoRA modules because they need to know the new tokens.
 # For LLaMA and Mistral, you need to save `embed_tokens` and `lm_head`. It may vary for other models.
--- a/docker/Dockerfile-runpod
+++ b/docker/Dockerfile-runpod
@@ -7,14 +7,16 @@ ENV TRANSFORMERS_CACHE="/workspace/data/huggingface-cache/hub"
 ENV HF_HOME="/workspace/data/huggingface-cache/hub"
 ENV HF_HUB_ENABLE_HF_TRANSFER="1"

-COPY scripts/runpod-entrypoint.sh /root/runpod-entrypoint.sh
+COPY scripts/cloud-entrypoint.sh /root/cloud-entrypoint.sh

+RUN pip install jupyterlab notebook && \
+    jupyter lab clean
 RUN apt install --yes --no-install-recommends openssh-server tmux && \
    mkdir -p ~/.ssh && \
    chmod 700 ~/.ssh && \
    printf "\n[[ -z \"\$TMUX\"  ]] && { tmux attach-session -t ssh_tmux || tmux new-session -s ssh_tmux; exit; }\n" >> ~/.bashrc && \
-    chmod +x /workspace/axolotl/scripts/runpod-entrypoint.sh && \
-    chmod +x /root/runpod-entrypoint.sh
+    chmod +x /workspace/axolotl/scripts/cloud-entrypoint.sh && \
+    chmod +x /root/cloud-entrypoint.sh

-ENTRYPOINT ["/root/runpod-entrypoint.sh"]
+ENTRYPOINT ["/root/cloud-entrypoint.sh"]
 CMD ["sleep", "infinity"]
--- a/scripts/runpod-entrypoint.sh
+++ b/scripts/runpod-entrypoint.sh
@@ -17,5 +17,16 @@ else
    echo "No PUBLIC_KEY ENV variable provided, not starting openSSH daemon"
 fi

+# Check if JUPYTER_PASSWORD is set and not empty
+if [ -n "$JUPYTER_PASSWORD" ]; then
+    # Set JUPYTER_TOKEN to the value of JUPYTER_PASSWORD
+    export JUPYTER_TOKEN="$JUPYTER_PASSWORD"
+fi
+
+if [ "$JUPYTER_DISABLE" != "1" ]; then
+    # Run Jupyter Lab in the background
+    jupyter lab --allow-root --ip 0.0.0.0 &
+fi
+
 # Execute the passed arguments (CMD)
 exec "$@"
--- a/src/axolotl/utils/config.py
+++ b/src/axolotl/utils/config.py
@@ -257,6 +257,11 @@ def validate_config(cfg):
    if cfg.adapter == "lora" and (cfg.flash_attn_fuse_qkv or cfg.flash_attn_fuse_mlp):
        raise ValueError("Fused modules are not supported with LoRA")

+    if cfg.adapter and cfg.peft_layers_to_transform and cfg.unfrozen_parameters:
+        raise ValueError(
+            "`unfrozen_parameters` used with `peft_layers_to_transform` can have unexpected behavior."
+        )
+
    if cfg.relora_steps:
        if cfg.adapter not in ("lora", "qlora"):
            raise ValueError("cfg.adapter must be lora or qlora to use ReLoRA")
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -357,6 +357,9 @@ def load_model(

    if is_deepspeed_zero3_enabled():
        del model_kwargs["device_map"]
+    elif cfg.deepspeed:
+        del model_kwargs["device_map"]
+        model_kwargs["low_cpu_mem_usage"] = True

    if cfg.model_revision:
        model_kwargs["revision"] = cfg.model_revision
@@ -733,6 +736,7 @@ def load_lora(model, cfg, inference=False):
        r=cfg.lora_r,
        lora_alpha=cfg.lora_alpha,
        target_modules=lora_target_modules,
+        layers_to_transform=cfg.peft_layers_to_transform,
        lora_dropout=cfg.lora_dropout,
        fan_in_fan_out=cfg.lora_fan_in_fan_out,
        modules_to_save=cfg.lora_modules_to_save if cfg.lora_modules_to_save else None,
--- a/tests/test_validation.py
+++ b/tests/test_validation.py
@@ -694,6 +694,21 @@ class ValidationTest(BaseValidation):

        validate_config(cfg)

+    def test_unfrozen_parameters_w_peft_layers_to_transform(self):
+        cfg = DictDefault(
+            {
+                "adapter": "lora",
+                "unfrozen_parameters": ["model.layers.2[0-9]+.block_sparse_moe.gate.*"],
+                "peft_layers_to_transform": [0, 1],
+            }
+        )
+
+        with pytest.raises(
+            ValueError,
+            match=r".*can have unexpected behavior*",
+        ):
+            validate_config(cfg)
+

 class ValidationCheckModelConfig(BaseValidation):
    """
Author	SHA1	Message	Date
Wing Lian	1b33588f09	use low_cpu_mem_usage with ds zero 1 or 2	2024-01-16 19:33:44 -05:00
Wing Lian	1b59a3e698	use low_cpu_mem_usage when using deepspeed	2024-01-16 07:44:35 -05:00
Wing Lian	ece0211996	Agnostic cloud gpu docker image and Jupyter lab (#1097 )	2024-01-15 22:37:54 -05:00
xzuyn	8487b97cf3	Add `layers_to_transform` for `lora_config` (#1118 )	2024-01-15 21:29:55 -05:00
NanoCode012	9cd27b2f91	fix(readme): clarify custom user prompt [no-ci] (#1124 ) * fix(readme): clarify custom user prompt * chore: update example to show use case of setting field	2024-01-16 09:47:33 +09:00