Compare commits
5 Commits
keep_in_me
...
deepspeed-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1b33588f09 | ||
|
|
1b59a3e698 | ||
|
|
ece0211996 | ||
|
|
8487b97cf3 | ||
|
|
9cd27b2f91 |
6
.github/workflows/main.yml
vendored
6
.github/workflows/main.yml
vendored
@@ -113,7 +113,7 @@ jobs:
|
|||||||
id: metadata
|
id: metadata
|
||||||
uses: docker/metadata-action@v5
|
uses: docker/metadata-action@v5
|
||||||
with:
|
with:
|
||||||
images: winglian/axolotl-runpod
|
images: winglian/axolotl-cloud
|
||||||
- name: Login to Docker Hub
|
- name: Login to Docker Hub
|
||||||
uses: docker/login-action@v3
|
uses: docker/login-action@v3
|
||||||
with:
|
with:
|
||||||
@@ -128,9 +128,11 @@ jobs:
|
|||||||
build-args: |
|
build-args: |
|
||||||
BASE_TAG=${{ github.ref_name }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
BASE_TAG=${{ github.ref_name }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
||||||
CUDA=${{ matrix.cuda }}
|
CUDA=${{ matrix.cuda }}
|
||||||
file: ./docker/Dockerfile-runpod
|
file: ./docker/Dockerfile-cloud
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
push: ${{ github.event_name != 'pull_request' }}
|
||||||
tags: |
|
tags: |
|
||||||
${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
||||||
|
winglian/axolotl-runpod:main-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
||||||
${{ (matrix.is_latest) && format('{0}-latest', steps.metadata.outputs.tags) || '' }}
|
${{ (matrix.is_latest) && format('{0}-latest', steps.metadata.outputs.tags) || '' }}
|
||||||
|
${{ (matrix.is_latest) && format('{0}-latest', 'winglian/axolotl-runpod:main') || '' }}
|
||||||
labels: ${{ steps.metadata.outputs.labels }}
|
labels: ${{ steps.metadata.outputs.labels }}
|
||||||
|
|||||||
20
README.md
20
README.md
@@ -25,7 +25,7 @@ Features:
|
|||||||
- [Installation](#installation)
|
- [Installation](#installation)
|
||||||
- [Docker](#docker)
|
- [Docker](#docker)
|
||||||
- [Conda/Pip venv](#condapip-venv)
|
- [Conda/Pip venv](#condapip-venv)
|
||||||
- [Runpod](#runpod)
|
- [Cloud GPU](#cloud-gpu) - Runpod, Latitude
|
||||||
- [LambdaLabs](#lambdalabs)
|
- [LambdaLabs](#lambdalabs)
|
||||||
- [Windows](#windows)
|
- [Windows](#windows)
|
||||||
- [Launching on public clouds via SkyPilot](#launching-on-public-clouds-via-skypilot)
|
- [Launching on public clouds via SkyPilot](#launching-on-public-clouds-via-skypilot)
|
||||||
@@ -172,9 +172,11 @@ docker run --privileged --gpus '"all"' --shm-size 10g --rm -it --name axolotl --
|
|||||||
```
|
```
|
||||||
Get the token at huggingface.co/settings/tokens
|
Get the token at huggingface.co/settings/tokens
|
||||||
|
|
||||||
#### Runpod
|
#### Cloud GPU
|
||||||
|
|
||||||
Use `winglian/axolotl-runpod:main-latest` or use this [direct link](https://runpod.io/gsc?template=v2ickqhz9s&ref=6i7fkpdz)
|
For cloud GPU providers that support docker images, use [`winglian/axolotl-cloud:main-latest`](https://hub.docker.com/r/winglian/axolotl-cloud/tags)
|
||||||
|
|
||||||
|
- on RunPod use this [direct link](https://runpod.io/gsc?template=v2ickqhz9s&ref=6i7fkpdz)
|
||||||
|
|
||||||
#### LambdaLabs
|
#### LambdaLabs
|
||||||
<details>
|
<details>
|
||||||
@@ -374,7 +376,7 @@ Have dataset(s) in one of the following format (JSONL recommended):
|
|||||||
For a dataset that is preprocessed for instruction purposes:
|
For a dataset that is preprocessed for instruction purposes:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{"instruction": "...", "output": "..."}
|
{"input": "...", "output": "..."}
|
||||||
```
|
```
|
||||||
|
|
||||||
You can use this example in your YAML config:
|
You can use this example in your YAML config:
|
||||||
@@ -385,6 +387,8 @@ datasets:
|
|||||||
type:
|
type:
|
||||||
system_prompt: ""
|
system_prompt: ""
|
||||||
field_system: system
|
field_system: system
|
||||||
|
field_instruction: input
|
||||||
|
field_output: output
|
||||||
format: "[INST] {instruction} [/INST]"
|
format: "[INST] {instruction} [/INST]"
|
||||||
no_input_format: "[INST] {instruction} [/INST]"
|
no_input_format: "[INST] {instruction} [/INST]"
|
||||||
```
|
```
|
||||||
@@ -577,10 +581,10 @@ datasets:
|
|||||||
field_human: # Optional[str]. Human key to use for conversation.
|
field_human: # Optional[str]. Human key to use for conversation.
|
||||||
field_model: # Optional[str]. Assistant key to use for conversation.
|
field_model: # Optional[str]. Assistant key to use for conversation.
|
||||||
|
|
||||||
# Custom user prompt
|
# Custom user instruction prompt
|
||||||
- path: repo
|
- path: repo
|
||||||
type:
|
type:
|
||||||
# The below are defaults. only set what's needed.
|
# The below are defaults. only set what's needed if you use a different column name.
|
||||||
system_prompt: ""
|
system_prompt: ""
|
||||||
system_format: "{system}"
|
system_format: "{system}"
|
||||||
field_system: system
|
field_system: system
|
||||||
@@ -589,6 +593,7 @@ datasets:
|
|||||||
field_output: output
|
field_output: output
|
||||||
|
|
||||||
# Customizable to be single line or multi-line
|
# Customizable to be single line or multi-line
|
||||||
|
# Use {instruction}/{input} as key to be replaced
|
||||||
# 'format' can include {input}
|
# 'format' can include {input}
|
||||||
format: |-
|
format: |-
|
||||||
User: {instruction} {input}
|
User: {instruction} {input}
|
||||||
@@ -674,7 +679,8 @@ lora_target_modules:
|
|||||||
# - gate_proj
|
# - gate_proj
|
||||||
# - down_proj
|
# - down_proj
|
||||||
# - up_proj
|
# - up_proj
|
||||||
lora_target_linear: # If true, will target all linear layers
|
lora_target_linear: # If true, will target all linear modules
|
||||||
|
peft_layers_to_transform: # The layer indices to transform, otherwise, apply to all layers
|
||||||
|
|
||||||
# If you added new tokens to the tokenizer, you may need to save some LoRA modules because they need to know the new tokens.
|
# If you added new tokens to the tokenizer, you may need to save some LoRA modules because they need to know the new tokens.
|
||||||
# For LLaMA and Mistral, you need to save `embed_tokens` and `lm_head`. It may vary for other models.
|
# For LLaMA and Mistral, you need to save `embed_tokens` and `lm_head`. It may vary for other models.
|
||||||
|
|||||||
@@ -7,14 +7,16 @@ ENV TRANSFORMERS_CACHE="/workspace/data/huggingface-cache/hub"
|
|||||||
ENV HF_HOME="/workspace/data/huggingface-cache/hub"
|
ENV HF_HOME="/workspace/data/huggingface-cache/hub"
|
||||||
ENV HF_HUB_ENABLE_HF_TRANSFER="1"
|
ENV HF_HUB_ENABLE_HF_TRANSFER="1"
|
||||||
|
|
||||||
COPY scripts/runpod-entrypoint.sh /root/runpod-entrypoint.sh
|
COPY scripts/cloud-entrypoint.sh /root/cloud-entrypoint.sh
|
||||||
|
|
||||||
|
RUN pip install jupyterlab notebook && \
|
||||||
|
jupyter lab clean
|
||||||
RUN apt install --yes --no-install-recommends openssh-server tmux && \
|
RUN apt install --yes --no-install-recommends openssh-server tmux && \
|
||||||
mkdir -p ~/.ssh && \
|
mkdir -p ~/.ssh && \
|
||||||
chmod 700 ~/.ssh && \
|
chmod 700 ~/.ssh && \
|
||||||
printf "\n[[ -z \"\$TMUX\" ]] && { tmux attach-session -t ssh_tmux || tmux new-session -s ssh_tmux; exit; }\n" >> ~/.bashrc && \
|
printf "\n[[ -z \"\$TMUX\" ]] && { tmux attach-session -t ssh_tmux || tmux new-session -s ssh_tmux; exit; }\n" >> ~/.bashrc && \
|
||||||
chmod +x /workspace/axolotl/scripts/runpod-entrypoint.sh && \
|
chmod +x /workspace/axolotl/scripts/cloud-entrypoint.sh && \
|
||||||
chmod +x /root/runpod-entrypoint.sh
|
chmod +x /root/cloud-entrypoint.sh
|
||||||
|
|
||||||
ENTRYPOINT ["/root/runpod-entrypoint.sh"]
|
ENTRYPOINT ["/root/cloud-entrypoint.sh"]
|
||||||
CMD ["sleep", "infinity"]
|
CMD ["sleep", "infinity"]
|
||||||
@@ -17,5 +17,16 @@ else
|
|||||||
echo "No PUBLIC_KEY ENV variable provided, not starting openSSH daemon"
|
echo "No PUBLIC_KEY ENV variable provided, not starting openSSH daemon"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Check if JUPYTER_PASSWORD is set and not empty
|
||||||
|
if [ -n "$JUPYTER_PASSWORD" ]; then
|
||||||
|
# Set JUPYTER_TOKEN to the value of JUPYTER_PASSWORD
|
||||||
|
export JUPYTER_TOKEN="$JUPYTER_PASSWORD"
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ "$JUPYTER_DISABLE" != "1" ]; then
|
||||||
|
# Run Jupyter Lab in the background
|
||||||
|
jupyter lab --allow-root --ip 0.0.0.0 &
|
||||||
|
fi
|
||||||
|
|
||||||
# Execute the passed arguments (CMD)
|
# Execute the passed arguments (CMD)
|
||||||
exec "$@"
|
exec "$@"
|
||||||
@@ -257,6 +257,11 @@ def validate_config(cfg):
|
|||||||
if cfg.adapter == "lora" and (cfg.flash_attn_fuse_qkv or cfg.flash_attn_fuse_mlp):
|
if cfg.adapter == "lora" and (cfg.flash_attn_fuse_qkv or cfg.flash_attn_fuse_mlp):
|
||||||
raise ValueError("Fused modules are not supported with LoRA")
|
raise ValueError("Fused modules are not supported with LoRA")
|
||||||
|
|
||||||
|
if cfg.adapter and cfg.peft_layers_to_transform and cfg.unfrozen_parameters:
|
||||||
|
raise ValueError(
|
||||||
|
"`unfrozen_parameters` used with `peft_layers_to_transform` can have unexpected behavior."
|
||||||
|
)
|
||||||
|
|
||||||
if cfg.relora_steps:
|
if cfg.relora_steps:
|
||||||
if cfg.adapter not in ("lora", "qlora"):
|
if cfg.adapter not in ("lora", "qlora"):
|
||||||
raise ValueError("cfg.adapter must be lora or qlora to use ReLoRA")
|
raise ValueError("cfg.adapter must be lora or qlora to use ReLoRA")
|
||||||
|
|||||||
@@ -357,6 +357,9 @@ def load_model(
|
|||||||
|
|
||||||
if is_deepspeed_zero3_enabled():
|
if is_deepspeed_zero3_enabled():
|
||||||
del model_kwargs["device_map"]
|
del model_kwargs["device_map"]
|
||||||
|
elif cfg.deepspeed:
|
||||||
|
del model_kwargs["device_map"]
|
||||||
|
model_kwargs["low_cpu_mem_usage"] = True
|
||||||
|
|
||||||
if cfg.model_revision:
|
if cfg.model_revision:
|
||||||
model_kwargs["revision"] = cfg.model_revision
|
model_kwargs["revision"] = cfg.model_revision
|
||||||
@@ -733,6 +736,7 @@ def load_lora(model, cfg, inference=False):
|
|||||||
r=cfg.lora_r,
|
r=cfg.lora_r,
|
||||||
lora_alpha=cfg.lora_alpha,
|
lora_alpha=cfg.lora_alpha,
|
||||||
target_modules=lora_target_modules,
|
target_modules=lora_target_modules,
|
||||||
|
layers_to_transform=cfg.peft_layers_to_transform,
|
||||||
lora_dropout=cfg.lora_dropout,
|
lora_dropout=cfg.lora_dropout,
|
||||||
fan_in_fan_out=cfg.lora_fan_in_fan_out,
|
fan_in_fan_out=cfg.lora_fan_in_fan_out,
|
||||||
modules_to_save=cfg.lora_modules_to_save if cfg.lora_modules_to_save else None,
|
modules_to_save=cfg.lora_modules_to_save if cfg.lora_modules_to_save else None,
|
||||||
|
|||||||
@@ -694,6 +694,21 @@ class ValidationTest(BaseValidation):
|
|||||||
|
|
||||||
validate_config(cfg)
|
validate_config(cfg)
|
||||||
|
|
||||||
|
def test_unfrozen_parameters_w_peft_layers_to_transform(self):
|
||||||
|
cfg = DictDefault(
|
||||||
|
{
|
||||||
|
"adapter": "lora",
|
||||||
|
"unfrozen_parameters": ["model.layers.2[0-9]+.block_sparse_moe.gate.*"],
|
||||||
|
"peft_layers_to_transform": [0, 1],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
with pytest.raises(
|
||||||
|
ValueError,
|
||||||
|
match=r".*can have unexpected behavior*",
|
||||||
|
):
|
||||||
|
validate_config(cfg)
|
||||||
|
|
||||||
|
|
||||||
class ValidationCheckModelConfig(BaseValidation):
|
class ValidationCheckModelConfig(BaseValidation):
|
||||||
"""
|
"""
|
||||||
|
|||||||
Reference in New Issue
Block a user