diff --git a/.gitignore b/.gitignore index e1cd555b9..9d6a103da 100644 --- a/.gitignore +++ b/.gitignore @@ -167,3 +167,8 @@ cython_debug/ # WandB # wandb creates a folder to store logs for training runs wandb + +# Runs +lora-out/* +qlora-out/* +mlruns/* diff --git a/README.md b/README.md index f5058db3b..2d03968fd 100644 --- a/README.md +++ b/README.md @@ -99,7 +99,23 @@ Get started with Axolotl in just a few steps! This quickstart guide will walk yo **Requirements**: Python >=3.9 and Pytorch >=2.1.1. -`pip3 install "axolotl[flash-attn,deepspeed] @ git+https://github.com/OpenAccess-AI-Collective/axolotl"` +### For developers +```bash +git clone https://github.com/OpenAccess-AI-Collective/axolotl +cd axolotl + +pip3 install packaging +``` + +General case: +``` +pip3 install -e '.[flash-attn,deepspeed]' +``` + +Mac: see https://github.com/OpenAccess-AI-Collective/axolotl/blob/13199f678b9aab39e92961323bdbce3234ee4b2b/docs/mac.md +``` +pip3 install -e '.' +``` ### Usage ```bash diff --git a/docs/mac.md b/docs/mac.md new file mode 100644 index 000000000..59eacce6d --- /dev/null +++ b/docs/mac.md @@ -0,0 +1,18 @@ +# Mac M series support + +Currently Axolotl on Mac is partially usable, many of the dependencies of Axolotl including Pytorch do not support MPS or have incomplete support. + +Current support: +- [x] Support for all models +- [x] Full training of models +- [x] LoRA training +- [x] Sample packing +- [ ] FP16 and BF16 (awaiting AMP support for MPS in Pytorch) +- [ ] Tri-dao's flash-attn (until it is supported use spd_attention as an alternative) +- [ ] xformers +- [ ] bitsandbytes (meaning no 4/8 bits loading and bnb optimizers) +- [ ] qlora +- [ ] DeepSpeed + +Untested: +- FSDP diff --git a/examples/mistral/lora-mps.yml b/examples/mistral/lora-mps.yml new file mode 100644 index 000000000..31b0d527e --- /dev/null +++ b/examples/mistral/lora-mps.yml @@ -0,0 +1,79 @@ +base_model: mistralai/Mistral-7B-v0.1 +model_type: MistralForCausalLM +tokenizer_type: LlamaTokenizer + +load_in_8bit: false +load_in_4bit: false +strict: false + +datasets: + - path: mhenrichsen/alpaca_2k_test + type: alpaca +dataset_prepared_path: last_run_prepared +val_set_size: 0 +output_dir: ./lora-out +eval_sample_packing: false + +adapter: lora +lora_model_dir: + +sequence_len: 4096 +sample_packing: true +pad_to_sequence_len: true + +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_linear: true +lora_fan_in_fan_out: +lora_target_modules: + - gate_proj + - down_proj + - up_proj + - q_proj + - v_proj + - k_proj + - o_proj + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 8 +micro_batch_size: 1 +num_epochs: 2 +optimizer: adamw_torch +lr_scheduler: cosine +learning_rate: 0.0002 + +train_on_inputs: false +group_by_length: false +bf16: auto +fp16: false +tf32: true + +gradient_checkpointing: true +early_stopping_patience: +resume_from_checkpoint: +local_rank: +logging_steps: 1 +xformers_attention: +flash_attention: false +sdp_attention: true + +loss_watchdog_threshold: 5.0 +loss_watchdog_patience: 3 + +warmup_steps: 10 +evals_per_epoch: 4 +eval_table_size: +eval_table_max_new_tokens: 128 +saves_per_epoch: 1 +debug: +deepspeed: +weight_decay: 0.0 +fsdp: +fsdp_config: +special_tokens: