Compare commits

..

1 Commits

Author SHA1 Message Date
Casper
eea6e8303a Disable datasets caching when preparing dataset for packing 2024-01-15 23:48:24 +01:00
8 changed files with 14 additions and 59 deletions

View File

@@ -113,7 +113,7 @@ jobs:
id: metadata id: metadata
uses: docker/metadata-action@v5 uses: docker/metadata-action@v5
with: with:
images: winglian/axolotl-cloud images: winglian/axolotl-runpod
- name: Login to Docker Hub - name: Login to Docker Hub
uses: docker/login-action@v3 uses: docker/login-action@v3
with: with:
@@ -128,11 +128,9 @@ jobs:
build-args: | build-args: |
BASE_TAG=${{ github.ref_name }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }} BASE_TAG=${{ github.ref_name }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
CUDA=${{ matrix.cuda }} CUDA=${{ matrix.cuda }}
file: ./docker/Dockerfile-cloud file: ./docker/Dockerfile-runpod
push: ${{ github.event_name != 'pull_request' }} push: ${{ github.event_name != 'pull_request' }}
tags: | tags: |
${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }} ${{ steps.metadata.outputs.tags }}-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
winglian/axolotl-runpod:main-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
${{ (matrix.is_latest) && format('{0}-latest', steps.metadata.outputs.tags) || '' }} ${{ (matrix.is_latest) && format('{0}-latest', steps.metadata.outputs.tags) || '' }}
${{ (matrix.is_latest) && format('{0}-latest', 'winglian/axolotl-runpod:main') || '' }}
labels: ${{ steps.metadata.outputs.labels }} labels: ${{ steps.metadata.outputs.labels }}

View File

@@ -25,7 +25,7 @@ Features:
- [Installation](#installation) - [Installation](#installation)
- [Docker](#docker) - [Docker](#docker)
- [Conda/Pip venv](#condapip-venv) - [Conda/Pip venv](#condapip-venv)
- [Cloud GPU](#cloud-gpu) - Runpod, Latitude - [Runpod](#runpod)
- [LambdaLabs](#lambdalabs) - [LambdaLabs](#lambdalabs)
- [Windows](#windows) - [Windows](#windows)
- [Launching on public clouds via SkyPilot](#launching-on-public-clouds-via-skypilot) - [Launching on public clouds via SkyPilot](#launching-on-public-clouds-via-skypilot)
@@ -172,11 +172,9 @@ docker run --privileged --gpus '"all"' --shm-size 10g --rm -it --name axolotl --
``` ```
Get the token at huggingface.co/settings/tokens Get the token at huggingface.co/settings/tokens
#### Cloud GPU #### Runpod
For cloud GPU providers that support docker images, use [`winglian/axolotl-cloud:main-latest`](https://hub.docker.com/r/winglian/axolotl-cloud/tags) Use `winglian/axolotl-runpod:main-latest` or use this [direct link](https://runpod.io/gsc?template=v2ickqhz9s&ref=6i7fkpdz)
- on RunPod use this [direct link](https://runpod.io/gsc?template=v2ickqhz9s&ref=6i7fkpdz)
#### LambdaLabs #### LambdaLabs
<details> <details>
@@ -376,7 +374,7 @@ Have dataset(s) in one of the following format (JSONL recommended):
For a dataset that is preprocessed for instruction purposes: For a dataset that is preprocessed for instruction purposes:
```json ```json
{"input": "...", "output": "..."} {"instruction": "...", "output": "..."}
``` ```
You can use this example in your YAML config: You can use this example in your YAML config:
@@ -387,8 +385,6 @@ datasets:
type: type:
system_prompt: "" system_prompt: ""
field_system: system field_system: system
field_instruction: input
field_output: output
format: "[INST] {instruction} [/INST]" format: "[INST] {instruction} [/INST]"
no_input_format: "[INST] {instruction} [/INST]" no_input_format: "[INST] {instruction} [/INST]"
``` ```
@@ -581,10 +577,10 @@ datasets:
field_human: # Optional[str]. Human key to use for conversation. field_human: # Optional[str]. Human key to use for conversation.
field_model: # Optional[str]. Assistant key to use for conversation. field_model: # Optional[str]. Assistant key to use for conversation.
# Custom user instruction prompt # Custom user prompt
- path: repo - path: repo
type: type:
# The below are defaults. only set what's needed if you use a different column name. # The below are defaults. only set what's needed.
system_prompt: "" system_prompt: ""
system_format: "{system}" system_format: "{system}"
field_system: system field_system: system
@@ -593,7 +589,6 @@ datasets:
field_output: output field_output: output
# Customizable to be single line or multi-line # Customizable to be single line or multi-line
# Use {instruction}/{input} as key to be replaced
# 'format' can include {input} # 'format' can include {input}
format: |- format: |-
User: {instruction} {input} User: {instruction} {input}
@@ -679,8 +674,7 @@ lora_target_modules:
# - gate_proj # - gate_proj
# - down_proj # - down_proj
# - up_proj # - up_proj
lora_target_linear: # If true, will target all linear modules lora_target_linear: # If true, will target all linear layers
peft_layers_to_transform: # The layer indices to transform, otherwise, apply to all layers
# If you added new tokens to the tokenizer, you may need to save some LoRA modules because they need to know the new tokens. # If you added new tokens to the tokenizer, you may need to save some LoRA modules because they need to know the new tokens.
# For LLaMA and Mistral, you need to save `embed_tokens` and `lm_head`. It may vary for other models. # For LLaMA and Mistral, you need to save `embed_tokens` and `lm_head`. It may vary for other models.

View File

@@ -7,16 +7,14 @@ ENV TRANSFORMERS_CACHE="/workspace/data/huggingface-cache/hub"
ENV HF_HOME="/workspace/data/huggingface-cache/hub" ENV HF_HOME="/workspace/data/huggingface-cache/hub"
ENV HF_HUB_ENABLE_HF_TRANSFER="1" ENV HF_HUB_ENABLE_HF_TRANSFER="1"
COPY scripts/cloud-entrypoint.sh /root/cloud-entrypoint.sh COPY scripts/runpod-entrypoint.sh /root/runpod-entrypoint.sh
RUN pip install jupyterlab notebook && \
jupyter lab clean
RUN apt install --yes --no-install-recommends openssh-server tmux && \ RUN apt install --yes --no-install-recommends openssh-server tmux && \
mkdir -p ~/.ssh && \ mkdir -p ~/.ssh && \
chmod 700 ~/.ssh && \ chmod 700 ~/.ssh && \
printf "\n[[ -z \"\$TMUX\" ]] && { tmux attach-session -t ssh_tmux || tmux new-session -s ssh_tmux; exit; }\n" >> ~/.bashrc && \ printf "\n[[ -z \"\$TMUX\" ]] && { tmux attach-session -t ssh_tmux || tmux new-session -s ssh_tmux; exit; }\n" >> ~/.bashrc && \
chmod +x /workspace/axolotl/scripts/cloud-entrypoint.sh && \ chmod +x /workspace/axolotl/scripts/runpod-entrypoint.sh && \
chmod +x /root/cloud-entrypoint.sh chmod +x /root/runpod-entrypoint.sh
ENTRYPOINT ["/root/cloud-entrypoint.sh"] ENTRYPOINT ["/root/runpod-entrypoint.sh"]
CMD ["sleep", "infinity"] CMD ["sleep", "infinity"]

View File

@@ -17,16 +17,5 @@ else
echo "No PUBLIC_KEY ENV variable provided, not starting openSSH daemon" echo "No PUBLIC_KEY ENV variable provided, not starting openSSH daemon"
fi fi
# Check if JUPYTER_PASSWORD is set and not empty
if [ -n "$JUPYTER_PASSWORD" ]; then
# Set JUPYTER_TOKEN to the value of JUPYTER_PASSWORD
export JUPYTER_TOKEN="$JUPYTER_PASSWORD"
fi
if [ "$JUPYTER_DISABLE" != "1" ]; then
# Run Jupyter Lab in the background
jupyter lab --allow-root --ip 0.0.0.0 &
fi
# Execute the passed arguments (CMD) # Execute the passed arguments (CMD)
exec "$@" exec "$@"

View File

@@ -257,11 +257,6 @@ def validate_config(cfg):
if cfg.adapter == "lora" and (cfg.flash_attn_fuse_qkv or cfg.flash_attn_fuse_mlp): if cfg.adapter == "lora" and (cfg.flash_attn_fuse_qkv or cfg.flash_attn_fuse_mlp):
raise ValueError("Fused modules are not supported with LoRA") raise ValueError("Fused modules are not supported with LoRA")
if cfg.adapter and cfg.peft_layers_to_transform and cfg.unfrozen_parameters:
raise ValueError(
"`unfrozen_parameters` used with `peft_layers_to_transform` can have unexpected behavior."
)
if cfg.relora_steps: if cfg.relora_steps:
if cfg.adapter not in ("lora", "qlora"): if cfg.adapter not in ("lora", "qlora"):
raise ValueError("cfg.adapter must be lora or qlora to use ReLoRA") raise ValueError("cfg.adapter must be lora or qlora to use ReLoRA")

View File

@@ -357,9 +357,6 @@ def load_model(
if is_deepspeed_zero3_enabled(): if is_deepspeed_zero3_enabled():
del model_kwargs["device_map"] del model_kwargs["device_map"]
elif cfg.deepspeed:
del model_kwargs["device_map"]
model_kwargs["low_cpu_mem_usage"] = True
if cfg.model_revision: if cfg.model_revision:
model_kwargs["revision"] = cfg.model_revision model_kwargs["revision"] = cfg.model_revision
@@ -736,7 +733,6 @@ def load_lora(model, cfg, inference=False):
r=cfg.lora_r, r=cfg.lora_r,
lora_alpha=cfg.lora_alpha, lora_alpha=cfg.lora_alpha,
target_modules=lora_target_modules, target_modules=lora_target_modules,
layers_to_transform=cfg.peft_layers_to_transform,
lora_dropout=cfg.lora_dropout, lora_dropout=cfg.lora_dropout,
fan_in_fan_out=cfg.lora_fan_in_fan_out, fan_in_fan_out=cfg.lora_fan_in_fan_out,
modules_to_save=cfg.lora_modules_to_save if cfg.lora_modules_to_save else None, modules_to_save=cfg.lora_modules_to_save if cfg.lora_modules_to_save else None,

View File

@@ -108,7 +108,7 @@ def disable_datasets_caching():
def process_datasets_for_packing(cfg, train_dataset, eval_dataset, tokenizer): def process_datasets_for_packing(cfg, train_dataset, eval_dataset, tokenizer):
drop_long = partial(drop_long_seq, sequence_len=cfg.sequence_len) drop_long = partial(drop_long_seq, sequence_len=cfg.sequence_len)
with zero_first(is_main_process()): with zero_first(is_main_process()), disable_datasets_caching():
if cfg.group_by_length: if cfg.group_by_length:
train_dataset = train_dataset.map( train_dataset = train_dataset.map(
add_length, num_proc=cfg.dataset_processes add_length, num_proc=cfg.dataset_processes

View File

@@ -694,21 +694,6 @@ class ValidationTest(BaseValidation):
validate_config(cfg) validate_config(cfg)
def test_unfrozen_parameters_w_peft_layers_to_transform(self):
cfg = DictDefault(
{
"adapter": "lora",
"unfrozen_parameters": ["model.layers.2[0-9]+.block_sparse_moe.gate.*"],
"peft_layers_to_transform": [0, 1],
}
)
with pytest.raises(
ValueError,
match=r".*can have unexpected behavior*",
):
validate_config(cfg)
class ValidationCheckModelConfig(BaseValidation): class ValidationCheckModelConfig(BaseValidation):
""" """