Compare commits
5 Commits
keep_in_me
...
deepspeed-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1b33588f09 | ||
|
|
1b59a3e698 | ||
|
|
ece0211996 | ||
|
|
8487b97cf3 | ||
|
|
9cd27b2f91 |
6
.github/workflows/main.yml
vendored
6
.github/workflows/main.yml
vendored
@@ -113,7 +113,7 @@ jobs:
|
||||
id: metadata
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
images: winglian/axolotl-runpod
|
||||
images: winglian/axolotl-cloud
|
||||
- name: Login to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
@@ -128,9 +128,11 @@ jobs:
|
||||
build-args: |
|
||||
BASE_TAG=${{ github.ref_name }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
||||
CUDA=${{ matrix.cuda }}
|
||||
file: ./docker/Dockerfile-runpod
|
||||
file: ./docker/Dockerfile-cloud
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
tags: |
|
||||
${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
||||
winglian/axolotl-runpod:main-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
||||
${{ (matrix.is_latest) && format('{0}-latest', steps.metadata.outputs.tags) || '' }}
|
||||
${{ (matrix.is_latest) && format('{0}-latest', 'winglian/axolotl-runpod:main') || '' }}
|
||||
labels: ${{ steps.metadata.outputs.labels }}
|
||||
|
||||
20
README.md
20
README.md
@@ -25,7 +25,7 @@ Features:
|
||||
- [Installation](#installation)
|
||||
- [Docker](#docker)
|
||||
- [Conda/Pip venv](#condapip-venv)
|
||||
- [Runpod](#runpod)
|
||||
- [Cloud GPU](#cloud-gpu) - Runpod, Latitude
|
||||
- [LambdaLabs](#lambdalabs)
|
||||
- [Windows](#windows)
|
||||
- [Launching on public clouds via SkyPilot](#launching-on-public-clouds-via-skypilot)
|
||||
@@ -172,9 +172,11 @@ docker run --privileged --gpus '"all"' --shm-size 10g --rm -it --name axolotl --
|
||||
```
|
||||
Get the token at huggingface.co/settings/tokens
|
||||
|
||||
#### Runpod
|
||||
#### Cloud GPU
|
||||
|
||||
Use `winglian/axolotl-runpod:main-latest` or use this [direct link](https://runpod.io/gsc?template=v2ickqhz9s&ref=6i7fkpdz)
|
||||
For cloud GPU providers that support docker images, use [`winglian/axolotl-cloud:main-latest`](https://hub.docker.com/r/winglian/axolotl-cloud/tags)
|
||||
|
||||
- on RunPod use this [direct link](https://runpod.io/gsc?template=v2ickqhz9s&ref=6i7fkpdz)
|
||||
|
||||
#### LambdaLabs
|
||||
<details>
|
||||
@@ -374,7 +376,7 @@ Have dataset(s) in one of the following format (JSONL recommended):
|
||||
For a dataset that is preprocessed for instruction purposes:
|
||||
|
||||
```json
|
||||
{"instruction": "...", "output": "..."}
|
||||
{"input": "...", "output": "..."}
|
||||
```
|
||||
|
||||
You can use this example in your YAML config:
|
||||
@@ -385,6 +387,8 @@ datasets:
|
||||
type:
|
||||
system_prompt: ""
|
||||
field_system: system
|
||||
field_instruction: input
|
||||
field_output: output
|
||||
format: "[INST] {instruction} [/INST]"
|
||||
no_input_format: "[INST] {instruction} [/INST]"
|
||||
```
|
||||
@@ -577,10 +581,10 @@ datasets:
|
||||
field_human: # Optional[str]. Human key to use for conversation.
|
||||
field_model: # Optional[str]. Assistant key to use for conversation.
|
||||
|
||||
# Custom user prompt
|
||||
# Custom user instruction prompt
|
||||
- path: repo
|
||||
type:
|
||||
# The below are defaults. only set what's needed.
|
||||
# The below are defaults. only set what's needed if you use a different column name.
|
||||
system_prompt: ""
|
||||
system_format: "{system}"
|
||||
field_system: system
|
||||
@@ -589,6 +593,7 @@ datasets:
|
||||
field_output: output
|
||||
|
||||
# Customizable to be single line or multi-line
|
||||
# Use {instruction}/{input} as key to be replaced
|
||||
# 'format' can include {input}
|
||||
format: |-
|
||||
User: {instruction} {input}
|
||||
@@ -674,7 +679,8 @@ lora_target_modules:
|
||||
# - gate_proj
|
||||
# - down_proj
|
||||
# - up_proj
|
||||
lora_target_linear: # If true, will target all linear layers
|
||||
lora_target_linear: # If true, will target all linear modules
|
||||
peft_layers_to_transform: # The layer indices to transform, otherwise, apply to all layers
|
||||
|
||||
# If you added new tokens to the tokenizer, you may need to save some LoRA modules because they need to know the new tokens.
|
||||
# For LLaMA and Mistral, you need to save `embed_tokens` and `lm_head`. It may vary for other models.
|
||||
|
||||
@@ -7,14 +7,16 @@ ENV TRANSFORMERS_CACHE="/workspace/data/huggingface-cache/hub"
|
||||
ENV HF_HOME="/workspace/data/huggingface-cache/hub"
|
||||
ENV HF_HUB_ENABLE_HF_TRANSFER="1"
|
||||
|
||||
COPY scripts/runpod-entrypoint.sh /root/runpod-entrypoint.sh
|
||||
COPY scripts/cloud-entrypoint.sh /root/cloud-entrypoint.sh
|
||||
|
||||
RUN pip install jupyterlab notebook && \
|
||||
jupyter lab clean
|
||||
RUN apt install --yes --no-install-recommends openssh-server tmux && \
|
||||
mkdir -p ~/.ssh && \
|
||||
chmod 700 ~/.ssh && \
|
||||
printf "\n[[ -z \"\$TMUX\" ]] && { tmux attach-session -t ssh_tmux || tmux new-session -s ssh_tmux; exit; }\n" >> ~/.bashrc && \
|
||||
chmod +x /workspace/axolotl/scripts/runpod-entrypoint.sh && \
|
||||
chmod +x /root/runpod-entrypoint.sh
|
||||
chmod +x /workspace/axolotl/scripts/cloud-entrypoint.sh && \
|
||||
chmod +x /root/cloud-entrypoint.sh
|
||||
|
||||
ENTRYPOINT ["/root/runpod-entrypoint.sh"]
|
||||
ENTRYPOINT ["/root/cloud-entrypoint.sh"]
|
||||
CMD ["sleep", "infinity"]
|
||||
@@ -17,5 +17,16 @@ else
|
||||
echo "No PUBLIC_KEY ENV variable provided, not starting openSSH daemon"
|
||||
fi
|
||||
|
||||
# Check if JUPYTER_PASSWORD is set and not empty
|
||||
if [ -n "$JUPYTER_PASSWORD" ]; then
|
||||
# Set JUPYTER_TOKEN to the value of JUPYTER_PASSWORD
|
||||
export JUPYTER_TOKEN="$JUPYTER_PASSWORD"
|
||||
fi
|
||||
|
||||
if [ "$JUPYTER_DISABLE" != "1" ]; then
|
||||
# Run Jupyter Lab in the background
|
||||
jupyter lab --allow-root --ip 0.0.0.0 &
|
||||
fi
|
||||
|
||||
# Execute the passed arguments (CMD)
|
||||
exec "$@"
|
||||
@@ -257,6 +257,11 @@ def validate_config(cfg):
|
||||
if cfg.adapter == "lora" and (cfg.flash_attn_fuse_qkv or cfg.flash_attn_fuse_mlp):
|
||||
raise ValueError("Fused modules are not supported with LoRA")
|
||||
|
||||
if cfg.adapter and cfg.peft_layers_to_transform and cfg.unfrozen_parameters:
|
||||
raise ValueError(
|
||||
"`unfrozen_parameters` used with `peft_layers_to_transform` can have unexpected behavior."
|
||||
)
|
||||
|
||||
if cfg.relora_steps:
|
||||
if cfg.adapter not in ("lora", "qlora"):
|
||||
raise ValueError("cfg.adapter must be lora or qlora to use ReLoRA")
|
||||
|
||||
@@ -357,6 +357,9 @@ def load_model(
|
||||
|
||||
if is_deepspeed_zero3_enabled():
|
||||
del model_kwargs["device_map"]
|
||||
elif cfg.deepspeed:
|
||||
del model_kwargs["device_map"]
|
||||
model_kwargs["low_cpu_mem_usage"] = True
|
||||
|
||||
if cfg.model_revision:
|
||||
model_kwargs["revision"] = cfg.model_revision
|
||||
@@ -733,6 +736,7 @@ def load_lora(model, cfg, inference=False):
|
||||
r=cfg.lora_r,
|
||||
lora_alpha=cfg.lora_alpha,
|
||||
target_modules=lora_target_modules,
|
||||
layers_to_transform=cfg.peft_layers_to_transform,
|
||||
lora_dropout=cfg.lora_dropout,
|
||||
fan_in_fan_out=cfg.lora_fan_in_fan_out,
|
||||
modules_to_save=cfg.lora_modules_to_save if cfg.lora_modules_to_save else None,
|
||||
|
||||
@@ -694,6 +694,21 @@ class ValidationTest(BaseValidation):
|
||||
|
||||
validate_config(cfg)
|
||||
|
||||
def test_unfrozen_parameters_w_peft_layers_to_transform(self):
|
||||
cfg = DictDefault(
|
||||
{
|
||||
"adapter": "lora",
|
||||
"unfrozen_parameters": ["model.layers.2[0-9]+.block_sparse_moe.gate.*"],
|
||||
"peft_layers_to_transform": [0, 1],
|
||||
}
|
||||
)
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match=r".*can have unexpected behavior*",
|
||||
):
|
||||
validate_config(cfg)
|
||||
|
||||
|
||||
class ValidationCheckModelConfig(BaseValidation):
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user