diff --git a/README.md b/README.md index c70abf648..9afc37c43 100644 --- a/README.md +++ b/README.md @@ -96,7 +96,7 @@ accelerate launch -m axolotl.cli.train examples/openllama-3b/lora.yml # inference accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \ - --lora_model_dir="./lora-out" + --peft_model_dir="./lora-out" ``` ## Installation @@ -531,7 +531,7 @@ total_num_tokens: adapter: lora # If you already have a lora model trained that you want to load, put that here. # This means after training, if you want to test the model, you should set this to the value of `lora_out_dir`. -lora_model_dir: +peft_model_dir: # LoRA hyperparameters # For more details about the following options, see: @@ -869,7 +869,7 @@ Pass the appropriate flag to the train command: - Pretrained LORA: ```bash - python -m axolotl.cli.inference examples/your_config.yml --lora_model_dir="./lora-output-dir" + python -m axolotl.cli.inference examples/your_config.yml --peft_model_dir="./lora-output-dir" ``` - Full weights finetune: ```bash @@ -890,7 +890,7 @@ Please use `--sample_packing False` if you have it on and receive the error simi Add below flag to train command above ```bash -python3 -m axolotl.cli.merge_lora examples/your_config.yml --lora_model_dir="./completed-model" --load_in_8bit=False --load_in_4bit=False +python3 -m axolotl.cli.merge_lora examples/your_config.yml --peft_model_dir="./completed-model" --load_in_8bit=False --load_in_4bit=False ``` If you run out of CUDA memory, you can try to merge in system RAM with diff --git a/examples/cerebras/btlm-ft.yml b/examples/cerebras/btlm-ft.yml index 4fd34aa5f..6b419072f 100644 --- a/examples/cerebras/btlm-ft.yml +++ b/examples/cerebras/btlm-ft.yml @@ -18,7 +18,7 @@ dataset_prepared_path: last_prepared_run val_set_size: 0.01 adapter: -lora_model_dir: +peft_model_dir: sequence_len: 2048 max_packed_sequence_len: sample_packing: false diff --git a/examples/cerebras/qlora.yml b/examples/cerebras/qlora.yml index a13517f3e..bab3d04d5 100644 --- a/examples/cerebras/qlora.yml +++ b/examples/cerebras/qlora.yml @@ -10,7 +10,7 @@ datasets: dataset_prepared_path: val_set_size: 0.01 adapter: qlora -lora_model_dir: +peft_model_dir: sequence_len: 2048 max_packed_sequence_len: 2048 lora_r: 16 diff --git a/examples/code-llama/13b/lora.yml b/examples/code-llama/13b/lora.yml index 91807846b..74aab34f9 100644 --- a/examples/code-llama/13b/lora.yml +++ b/examples/code-llama/13b/lora.yml @@ -20,7 +20,7 @@ sample_packing: true pad_to_sequence_len: true adapter: lora -lora_model_dir: +peft_model_dir: lora_r: 32 lora_alpha: 16 lora_dropout: 0.05 diff --git a/examples/code-llama/13b/qlora.yml b/examples/code-llama/13b/qlora.yml index 9fa05ffab..a6b941089 100644 --- a/examples/code-llama/13b/qlora.yml +++ b/examples/code-llama/13b/qlora.yml @@ -16,7 +16,7 @@ val_set_size: 0.01 output_dir: ./qlora-out adapter: qlora -lora_model_dir: +peft_model_dir: sequence_len: 4096 sample_packing: true diff --git a/examples/code-llama/34b/lora.yml b/examples/code-llama/34b/lora.yml index a342b6ebc..9623c9c2c 100644 --- a/examples/code-llama/34b/lora.yml +++ b/examples/code-llama/34b/lora.yml @@ -20,7 +20,7 @@ sample_packing: true pad_to_sequence_len: true adapter: lora -lora_model_dir: +peft_model_dir: lora_r: 32 lora_alpha: 16 lora_dropout: 0.05 diff --git a/examples/code-llama/34b/qlora.yml b/examples/code-llama/34b/qlora.yml index 1501dd9a3..6a980e407 100644 --- a/examples/code-llama/34b/qlora.yml +++ b/examples/code-llama/34b/qlora.yml @@ -16,7 +16,7 @@ val_set_size: 0.01 output_dir: ./qlora-out adapter: qlora -lora_model_dir: +peft_model_dir: sequence_len: 4096 sample_packing: true diff --git a/examples/code-llama/7b/lora.yml b/examples/code-llama/7b/lora.yml index 638dddc43..4546d737b 100644 --- a/examples/code-llama/7b/lora.yml +++ b/examples/code-llama/7b/lora.yml @@ -20,7 +20,7 @@ sample_packing: true pad_to_sequence_len: true adapter: lora -lora_model_dir: +peft_model_dir: lora_r: 32 lora_alpha: 16 lora_dropout: 0.05 diff --git a/examples/code-llama/7b/qlora.yml b/examples/code-llama/7b/qlora.yml index 5b3b33822..0b6d1c02d 100644 --- a/examples/code-llama/7b/qlora.yml +++ b/examples/code-llama/7b/qlora.yml @@ -16,7 +16,7 @@ val_set_size: 0.01 output_dir: ./qlora-out adapter: qlora -lora_model_dir: +peft_model_dir: sequence_len: 4096 sample_packing: true diff --git a/examples/falcon/config-7b-lora.yml b/examples/falcon/config-7b-lora.yml index f45deb643..79b7b62a3 100644 --- a/examples/falcon/config-7b-lora.yml +++ b/examples/falcon/config-7b-lora.yml @@ -15,7 +15,7 @@ datasets: dataset_prepared_path: val_set_size: 0.01 adapter: lora -lora_model_dir: +peft_model_dir: sequence_len: 2048 max_packed_sequence_len: lora_r: 16 diff --git a/examples/falcon/config-7b-qlora.yml b/examples/falcon/config-7b-qlora.yml index f59341965..8fa914820 100644 --- a/examples/falcon/config-7b-qlora.yml +++ b/examples/falcon/config-7b-qlora.yml @@ -22,7 +22,7 @@ dataset_prepared_path: val_set_size: 0.01 # enable QLoRA adapter: qlora -lora_model_dir: +peft_model_dir: sequence_len: 2048 max_packed_sequence_len: diff --git a/examples/falcon/config-7b.yml b/examples/falcon/config-7b.yml index 777a97b31..adaea40ed 100644 --- a/examples/falcon/config-7b.yml +++ b/examples/falcon/config-7b.yml @@ -15,7 +15,7 @@ datasets: dataset_prepared_path: val_set_size: 0.01 adapter: -lora_model_dir: +peft_model_dir: sequence_len: 2048 max_packed_sequence_len: lora_r: 64 diff --git a/examples/gptj/qlora.yml b/examples/gptj/qlora.yml index 696747dfe..b1b575155 100644 --- a/examples/gptj/qlora.yml +++ b/examples/gptj/qlora.yml @@ -10,7 +10,7 @@ datasets: dataset_prepared_path: val_set_size: 0.01 adapter: qlora -lora_model_dir: +peft_model_dir: sequence_len: 2048 max_packed_sequence_len: lora_r: 8 diff --git a/examples/jeopardy-bot/config.yml b/examples/jeopardy-bot/config.yml index 32e7a34ee..946308d36 100644 --- a/examples/jeopardy-bot/config.yml +++ b/examples/jeopardy-bot/config.yml @@ -9,7 +9,7 @@ datasets: dataset_prepared_path: val_set_size: 0.02 adapter: -lora_model_dir: +peft_model_dir: sequence_len: 512 max_packed_sequence_len: lora_r: diff --git a/examples/llama-2/gptq-lora.yml b/examples/llama-2/gptq-lora.yml index 257433f26..48ac43a96 100644 --- a/examples/llama-2/gptq-lora.yml +++ b/examples/llama-2/gptq-lora.yml @@ -18,7 +18,7 @@ datasets: dataset_prepared_path: val_set_size: 0.01 adapter: lora -lora_model_dir: +peft_model_dir: sequence_len: 4096 sample_packing: lora_r: 8 diff --git a/examples/llama-2/ia3.yml b/examples/llama-2/ia3.yml index a914a9179..48d81d5f0 100644 --- a/examples/llama-2/ia3.yml +++ b/examples/llama-2/ia3.yml @@ -20,7 +20,7 @@ sample_packing: true pad_to_sequence_len: true adapter: ia3 -ia3_model_dir: +peft_model_dir: ia3_target_modules: - k_proj - v_proj diff --git a/examples/llama-2/lora.yml b/examples/llama-2/lora.yml index 8c0e3e910..e196183ca 100644 --- a/examples/llama-2/lora.yml +++ b/examples/llama-2/lora.yml @@ -20,7 +20,7 @@ sample_packing: true pad_to_sequence_len: true adapter: lora -lora_model_dir: +peft_model_dir: lora_r: 32 lora_alpha: 16 lora_dropout: 0.05 diff --git a/examples/llama-2/qlora.yml b/examples/llama-2/qlora.yml index b8209934c..bdf40e1a8 100644 --- a/examples/llama-2/qlora.yml +++ b/examples/llama-2/qlora.yml @@ -16,7 +16,7 @@ val_set_size: 0.01 output_dir: ./qlora-out adapter: qlora -lora_model_dir: +peft_model_dir: sequence_len: 4096 sample_packing: true diff --git a/examples/llama-2/relora.yml b/examples/llama-2/relora.yml index 9f27cafea..be3b38efc 100644 --- a/examples/llama-2/relora.yml +++ b/examples/llama-2/relora.yml @@ -16,7 +16,7 @@ val_set_size: 0.01 output_dir: ./relora-out adapter: qlora -lora_model_dir: +peft_model_dir: sequence_len: 4096 sample_packing: true diff --git a/examples/llama-2/tiny-llama.yml b/examples/llama-2/tiny-llama.yml index 0b56ea7d3..3526333b1 100644 --- a/examples/llama-2/tiny-llama.yml +++ b/examples/llama-2/tiny-llama.yml @@ -20,7 +20,7 @@ sequence_len: 4096 sample_packing: true adapter: lora -lora_model_dir: +peft_model_dir: lora_r: 32 lora_alpha: 16 lora_dropout: 0.05 diff --git a/examples/mpt-7b/config.yml b/examples/mpt-7b/config.yml index 8d9b429b1..b57a83cd6 100644 --- a/examples/mpt-7b/config.yml +++ b/examples/mpt-7b/config.yml @@ -9,7 +9,7 @@ datasets: dataset_prepared_path: val_set_size: 0.02 adapter: -lora_model_dir: +peft_model_dir: sequence_len: 2048 max_packed_sequence_len: lora_r: 8 diff --git a/examples/openllama-3b/config.yml b/examples/openllama-3b/config.yml index dd11d53b0..1f4338ca3 100644 --- a/examples/openllama-3b/config.yml +++ b/examples/openllama-3b/config.yml @@ -12,7 +12,7 @@ datasets: dataset_prepared_path: val_set_size: 0.02 adapter: -lora_model_dir: +peft_model_dir: sequence_len: 1024 sample_packing: true lora_r: diff --git a/examples/openllama-3b/lora.yml b/examples/openllama-3b/lora.yml index fad3fb551..a59b5883f 100644 --- a/examples/openllama-3b/lora.yml +++ b/examples/openllama-3b/lora.yml @@ -12,7 +12,7 @@ datasets: dataset_prepared_path: val_set_size: 0.02 adapter: lora -lora_model_dir: +peft_model_dir: sequence_len: 1024 sample_packing: true lora_r: 8 diff --git a/examples/openllama-3b/qlora.yml b/examples/openllama-3b/qlora.yml index 80d4d727b..9ebd774fb 100644 --- a/examples/openllama-3b/qlora.yml +++ b/examples/openllama-3b/qlora.yml @@ -12,7 +12,7 @@ datasets: dataset_prepared_path: val_set_size: 0.01 adapter: qlora -lora_model_dir: +peft_model_dir: sequence_len: 1024 sample_packing: true lora_r: 8 diff --git a/examples/phi/phi-ft.yml b/examples/phi/phi-ft.yml index 668eea317..30052252b 100644 --- a/examples/phi/phi-ft.yml +++ b/examples/phi/phi-ft.yml @@ -22,7 +22,7 @@ sample_packing: true pad_to_sequence_len: adapter: -lora_model_dir: +peft_model_dir: lora_r: lora_alpha: lora_dropout: diff --git a/examples/phi/phi-qlora.yml b/examples/phi/phi-qlora.yml index a548b3f05..d3116e207 100644 --- a/examples/phi/phi-qlora.yml +++ b/examples/phi/phi-qlora.yml @@ -22,7 +22,7 @@ sample_packing: false # not CURRENTLY compatible with LoRAs pad_to_sequence_len: adapter: qlora -lora_model_dir: +peft_model_dir: lora_r: 64 lora_alpha: 32 lora_dropout: 0.05 diff --git a/examples/pythia-12b/config.yml b/examples/pythia-12b/config.yml index 4e0e1523a..30040efb0 100644 --- a/examples/pythia-12b/config.yml +++ b/examples/pythia-12b/config.yml @@ -13,7 +13,7 @@ datasets: dataset_prepared_path: val_set_size: 0.05 adapter: -lora_model_dir: +peft_model_dir: sequence_len: 2048 max_packed_sequence_len: 2048 lora_r: 64 diff --git a/examples/pythia/lora.yml b/examples/pythia/lora.yml index 6ff036621..fe20da563 100644 --- a/examples/pythia/lora.yml +++ b/examples/pythia/lora.yml @@ -7,7 +7,7 @@ datasets: dataset_prepared_path: val_set_size: 0.05 adapter: lora -lora_model_dir: +peft_model_dir: sequence_len: 512 lora_r: 16 lora_alpha: 32 diff --git a/examples/redpajama/config-3b.yml b/examples/redpajama/config-3b.yml index 97f31c87a..3dc3a1674 100644 --- a/examples/redpajama/config-3b.yml +++ b/examples/redpajama/config-3b.yml @@ -10,7 +10,7 @@ datasets: dataset_prepared_path: val_set_size: 0.02 adapter: -lora_model_dir: +peft_model_dir: sequence_len: 2048 max_packed_sequence_len: lora_r: 8 diff --git a/examples/replit-3b/config-lora.yml b/examples/replit-3b/config-lora.yml index d345e25a0..09174e6ba 100644 --- a/examples/replit-3b/config-lora.yml +++ b/examples/replit-3b/config-lora.yml @@ -8,7 +8,7 @@ datasets: dataset_prepared_path: val_set_size: 0.05 adapter: lora -lora_model_dir: +peft_model_dir: sequence_len: 2048 max_packed_sequence_len: lora_r: 8 diff --git a/examples/xgen-7b/xgen-7b-8k-qlora.yml b/examples/xgen-7b/xgen-7b-8k-qlora.yml index 352dcb610..1fb61118d 100644 --- a/examples/xgen-7b/xgen-7b-8k-qlora.yml +++ b/examples/xgen-7b/xgen-7b-8k-qlora.yml @@ -20,7 +20,7 @@ dataset_prepared_path: val_set_size: 0.01 # enable QLoRA adapter: qlora -lora_model_dir: +peft_model_dir: sequence_len: 8192 max_packed_sequence_len: diff --git a/src/axolotl/utils/config.py b/src/axolotl/utils/config.py index 9503d838c..1c0a15d67 100644 --- a/src/axolotl/utils/config.py +++ b/src/axolotl/utils/config.py @@ -190,7 +190,10 @@ def validate_config(cfg): raise ValueError("Require cfg.load_in_4bit to be True for qlora") if not cfg.load_in_8bit and cfg.adapter == "lora": - LOG.warning("We recommend setting `load_in_8bit: true` for LORA finetuning") + LOG.warning("We recommend setting `load_in_8bit: true` for LoRA finetuning") + + if not cfg.load_in_8bit and cfg.adapter == "ia3": + LOG.warning("We recommend setting `load_in_8bit: true` for IA3 finetuning") if cfg.relora_steps: if cfg.adapter not in ("lora", "qlora"): diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py index 5a11c1711..b99c95158 100644 --- a/src/axolotl/utils/models.py +++ b/src/axolotl/utils/models.py @@ -406,23 +406,21 @@ def load_model( if hasattr(module, "weight"): module.to(torch.float32) - needs_fa2_dtype = cfg.adapter or cfg.fsdp - if ( - (cfg.adapter == "lora" and cfg.load_in_8bit) - or (cfg.adapter == "qlora" and cfg.load_in_4bit) - or (cfg.adapter == "ia3" and cfg.load_in_8bit) - ): + require_peft: bool = False + if cfg.adapter in ["lora", "qlora", "ia3"]: + require_peft = True + + if require_peft: LOG.info("converting PEFT model w/ prepare_model_for_kbit_training") if cfg.gradient_checkpointing: model.gradient_checkpointing_enable() model = prepare_model_for_kbit_training( model, use_gradient_checkpointing=cfg.gradient_checkpointing ) - needs_fa2_dtype = True # LlamaRMSNorm layers are in fp32 after kbit_training or full finetune, so we need to # convert them back to fp16/bf16 for flash-attn compatibility. - if needs_fa2_dtype or (cfg.flash_attention and cfg.is_llama_derived_model): + if require_peft or cfg.fsdp or (cfg.flash_attention and cfg.is_llama_derived_model): LOG.info("converting modules to %s for flash attention", cfg.torch_dtype) for name, module in model.named_modules(): if "norm" in name: @@ -492,11 +490,11 @@ def load_llama_adapter(model, cfg): task_type="CAUSAL_LM", ) - if cfg.lora_model_dir: + if cfg.peft_model_dir or cfg.lora_model_dir: LOG.debug("Loading pretained PEFT - llama_adapter") model = PeftModel.from_pretrained( model, - cfg.lora_model_dir, + cfg.peft_model_dir or cfg.lora_model_dir, torch_dtype=torch.float16, ) else: @@ -548,11 +546,11 @@ def load_lora(model, cfg, inference=False): task_type="CAUSAL_LM", ) - if cfg.lora_model_dir: + if cfg.peft_model_dir: LOG.debug("Loading pretained PEFT - LoRA") model = PeftModel.from_pretrained( model, - cfg.lora_model_dir, + cfg.peft_model_dir, is_trainable=(not inference), ) else: @@ -581,11 +579,11 @@ def load_ia3(model, cfg, inference=False): **ia3_config_kwargs, ) - if cfg.ia3_model_dir: + if cfg.peft_model_dir: LOG.debug("Loading pretained PEFT - IA3") model = PeftModel.from_pretrained( model, - cfg.ia3_model_dir, + cfg.peft_model_dir, is_trainable=(not inference), ) else: