Fix(doc): Minor doc changes for peft and modal (#2462) [skip ci]
* fix(doc): document peft configs * fix(doc): explain modal env vs secrets difference * fix(doc): clarify evaluate vs lm-eval * fix: clarify what is performance
This commit is contained in:
40
docs/cli.qmd
40
docs/cli.qmd
@@ -170,7 +170,7 @@ axolotl merge-sharded-fsdp-weights config.yml
|
||||
|
||||
### evaluate
|
||||
|
||||
Evaluates a model's performance using metrics specified in the config.
|
||||
Evaluates a model's performance (loss etc) on the train and eval datasets.
|
||||
|
||||
```bash
|
||||
# Basic evaluation
|
||||
@@ -197,6 +197,8 @@ lm_eval_batch_size: # Batch size for evaluation
|
||||
output_dir: # Directory to save evaluation results
|
||||
```
|
||||
|
||||
See [LM Eval Harness](https://github.com/EleutherAI/lm-evaluation-harness) for more details.
|
||||
|
||||
## Legacy CLI Usage
|
||||
|
||||
While the new Click-based CLI is preferred, Axolotl still supports the legacy module-based CLI:
|
||||
@@ -235,7 +237,7 @@ Create a cloud config YAML with your Modal settings:
|
||||
```yaml
|
||||
# cloud_config.yml
|
||||
provider: modal
|
||||
gpu: a100 # Supported: l40s, a100-40gb, a100-80gb, a10g, h100, t4, l4
|
||||
gpu: a100 # Supported: l40s, a100-40gb, a100-80gb, a10g, h100, t4, l4
|
||||
gpu_count: 1 # Number of GPUs to use
|
||||
timeout: 86400 # Maximum runtime in seconds (24 hours)
|
||||
branch: main # Git branch to use (optional)
|
||||
@@ -248,7 +250,7 @@ volumes: # Persistent storage volumes
|
||||
- name: axolotl-artifacts
|
||||
mount: /workspace/artifacts
|
||||
|
||||
env: # Environment variables
|
||||
secrets: # Secrets to inject
|
||||
- WANDB_API_KEY
|
||||
- HF_TOKEN
|
||||
```
|
||||
@@ -274,15 +276,27 @@ axolotl lm-eval config.yml --cloud cloud_config.yml
|
||||
### Cloud Configuration Options
|
||||
|
||||
```yaml
|
||||
provider: # compute provider, currently only `modal` is supported
|
||||
gpu: # GPU type to use
|
||||
gpu_count: # Number of GPUs (default: 1)
|
||||
memory: # RAM in GB (default: 128)
|
||||
timeout: # Maximum runtime in seconds
|
||||
provider: # compute provider, currently only `modal` is supported
|
||||
gpu: # GPU type to use
|
||||
gpu_count: # Number of GPUs (default: 1)
|
||||
memory: # RAM in GB (default: 128)
|
||||
timeout: # Maximum runtime in seconds
|
||||
timeout_preprocess: # Preprocessing timeout
|
||||
branch: # Git branch to use
|
||||
docker_tag: # Custom Docker image tag
|
||||
volumes: # List of persistent storage volumes
|
||||
env: # Environment variables to pass
|
||||
secrets: # Secrets to inject
|
||||
branch: # Git branch to use
|
||||
docker_tag: # Custom Docker image tag
|
||||
volumes: # List of persistent storage volumes
|
||||
|
||||
# Environment variables to pass. Can be specified in two ways:
|
||||
# 1. As a string: Will load the value from the host computer's environment variables
|
||||
# 2. As a key-value pair: Will use the specified value directly
|
||||
# Example:
|
||||
# env:
|
||||
# - CUSTOM_VAR # Loads from host's $CUSTOM_VAR
|
||||
# - {CUSTOM_VAR: "value"} # Uses "value" directly
|
||||
env:
|
||||
|
||||
# Secrets to inject. Same input format as `env` but for sensitive data.
|
||||
secrets:
|
||||
# - HF_TOKEN
|
||||
# - WANDB_API_KEY
|
||||
```
|
||||
|
||||
@@ -354,7 +354,27 @@ lora_target_modules:
|
||||
# - down_proj
|
||||
# - up_proj
|
||||
lora_target_linear: # If true, will target all linear modules
|
||||
peft_layers_to_transform: # The layer indices to transform, otherwise, apply to all layers
|
||||
|
||||
# List[int] | int. # The layer indices to transform, otherwise, apply to all layers
|
||||
# https://huggingface.co/docs/peft/v0.15.0/en/package_reference/lora#peft.LoraConfig.layers_to_transform
|
||||
peft_layers_to_transform:
|
||||
|
||||
# Optional[bool]. Whether to use DoRA.
|
||||
# https://huggingface.co/docs/peft/v0.15.0/en/developer_guides/lora#weight-decomposed-low-rank-adaptation-dora
|
||||
peft_use_dora:
|
||||
|
||||
# Optional[bool]. Whether to use RSLoRA.
|
||||
# https://huggingface.co/docs/peft/v0.15.0/en/developer_guides/lora#rank-stabilized-lora
|
||||
peft_use_rslora:
|
||||
|
||||
# Optional[list[tuple[int, int]]]. List of layer indices to replicate.
|
||||
# https://huggingface.co/docs/peft/v0.15.0/en/developer_guides/lora#memory-efficient-layer-replication-with-lora
|
||||
peft_layer_replication:
|
||||
|
||||
# bool | Literal["gaussian", "eva", "olora", "pissa", "pissa_niter_[number of iters]", "corda", "loftq"]
|
||||
# How to initialize LoRA weights. Default to True which is MS original implementation.
|
||||
# https://huggingface.co/docs/peft/v0.15.0/en/developer_guides/lora#initialization
|
||||
peft_init_lora_weights:
|
||||
|
||||
# If you added new tokens to the tokenizer, you may need to save some LoRA modules because they need to know the new tokens.
|
||||
# For LLaMA and Mistral, you need to save `embed_tokens` and `lm_head`. It may vary for other models.
|
||||
|
||||
Reference in New Issue
Block a user