consolidate as peft_model_dir

This commit is contained in:
Wing Lian
2023-09-19 19:02:14 -04:00
parent ba85308720
commit 203369411e
33 changed files with 50 additions and 49 deletions

View File

@@ -96,7 +96,7 @@ accelerate launch -m axolotl.cli.train examples/openllama-3b/lora.yml
# inference # inference
accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \ accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
--lora_model_dir="./lora-out" --peft_model_dir="./lora-out"
``` ```
## Installation ## Installation
@@ -531,7 +531,7 @@ total_num_tokens:
adapter: lora adapter: lora
# If you already have a lora model trained that you want to load, put that here. # If you already have a lora model trained that you want to load, put that here.
# This means after training, if you want to test the model, you should set this to the value of `lora_out_dir`. # This means after training, if you want to test the model, you should set this to the value of `lora_out_dir`.
lora_model_dir: peft_model_dir:
# LoRA hyperparameters # LoRA hyperparameters
# For more details about the following options, see: # For more details about the following options, see:
@@ -869,7 +869,7 @@ Pass the appropriate flag to the train command:
- Pretrained LORA: - Pretrained LORA:
```bash ```bash
python -m axolotl.cli.inference examples/your_config.yml --lora_model_dir="./lora-output-dir" python -m axolotl.cli.inference examples/your_config.yml --peft_model_dir="./lora-output-dir"
``` ```
- Full weights finetune: - Full weights finetune:
```bash ```bash
@@ -890,7 +890,7 @@ Please use `--sample_packing False` if you have it on and receive the error simi
Add below flag to train command above Add below flag to train command above
```bash ```bash
python3 -m axolotl.cli.merge_lora examples/your_config.yml --lora_model_dir="./completed-model" --load_in_8bit=False --load_in_4bit=False python3 -m axolotl.cli.merge_lora examples/your_config.yml --peft_model_dir="./completed-model" --load_in_8bit=False --load_in_4bit=False
``` ```
If you run out of CUDA memory, you can try to merge in system RAM with If you run out of CUDA memory, you can try to merge in system RAM with

View File

@@ -18,7 +18,7 @@ dataset_prepared_path: last_prepared_run
val_set_size: 0.01 val_set_size: 0.01
adapter: adapter:
lora_model_dir: peft_model_dir:
sequence_len: 2048 sequence_len: 2048
max_packed_sequence_len: max_packed_sequence_len:
sample_packing: false sample_packing: false

View File

@@ -10,7 +10,7 @@ datasets:
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.01 val_set_size: 0.01
adapter: qlora adapter: qlora
lora_model_dir: peft_model_dir:
sequence_len: 2048 sequence_len: 2048
max_packed_sequence_len: 2048 max_packed_sequence_len: 2048
lora_r: 16 lora_r: 16

View File

@@ -20,7 +20,7 @@ sample_packing: true
pad_to_sequence_len: true pad_to_sequence_len: true
adapter: lora adapter: lora
lora_model_dir: peft_model_dir:
lora_r: 32 lora_r: 32
lora_alpha: 16 lora_alpha: 16
lora_dropout: 0.05 lora_dropout: 0.05

View File

@@ -16,7 +16,7 @@ val_set_size: 0.01
output_dir: ./qlora-out output_dir: ./qlora-out
adapter: qlora adapter: qlora
lora_model_dir: peft_model_dir:
sequence_len: 4096 sequence_len: 4096
sample_packing: true sample_packing: true

View File

@@ -20,7 +20,7 @@ sample_packing: true
pad_to_sequence_len: true pad_to_sequence_len: true
adapter: lora adapter: lora
lora_model_dir: peft_model_dir:
lora_r: 32 lora_r: 32
lora_alpha: 16 lora_alpha: 16
lora_dropout: 0.05 lora_dropout: 0.05

View File

@@ -16,7 +16,7 @@ val_set_size: 0.01
output_dir: ./qlora-out output_dir: ./qlora-out
adapter: qlora adapter: qlora
lora_model_dir: peft_model_dir:
sequence_len: 4096 sequence_len: 4096
sample_packing: true sample_packing: true

View File

@@ -20,7 +20,7 @@ sample_packing: true
pad_to_sequence_len: true pad_to_sequence_len: true
adapter: lora adapter: lora
lora_model_dir: peft_model_dir:
lora_r: 32 lora_r: 32
lora_alpha: 16 lora_alpha: 16
lora_dropout: 0.05 lora_dropout: 0.05

View File

@@ -16,7 +16,7 @@ val_set_size: 0.01
output_dir: ./qlora-out output_dir: ./qlora-out
adapter: qlora adapter: qlora
lora_model_dir: peft_model_dir:
sequence_len: 4096 sequence_len: 4096
sample_packing: true sample_packing: true

View File

@@ -15,7 +15,7 @@ datasets:
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.01 val_set_size: 0.01
adapter: lora adapter: lora
lora_model_dir: peft_model_dir:
sequence_len: 2048 sequence_len: 2048
max_packed_sequence_len: max_packed_sequence_len:
lora_r: 16 lora_r: 16

View File

@@ -22,7 +22,7 @@ dataset_prepared_path:
val_set_size: 0.01 val_set_size: 0.01
# enable QLoRA # enable QLoRA
adapter: qlora adapter: qlora
lora_model_dir: peft_model_dir:
sequence_len: 2048 sequence_len: 2048
max_packed_sequence_len: max_packed_sequence_len:

View File

@@ -15,7 +15,7 @@ datasets:
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.01 val_set_size: 0.01
adapter: adapter:
lora_model_dir: peft_model_dir:
sequence_len: 2048 sequence_len: 2048
max_packed_sequence_len: max_packed_sequence_len:
lora_r: 64 lora_r: 64

View File

@@ -10,7 +10,7 @@ datasets:
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.01 val_set_size: 0.01
adapter: qlora adapter: qlora
lora_model_dir: peft_model_dir:
sequence_len: 2048 sequence_len: 2048
max_packed_sequence_len: max_packed_sequence_len:
lora_r: 8 lora_r: 8

View File

@@ -9,7 +9,7 @@ datasets:
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.02 val_set_size: 0.02
adapter: adapter:
lora_model_dir: peft_model_dir:
sequence_len: 512 sequence_len: 512
max_packed_sequence_len: max_packed_sequence_len:
lora_r: lora_r:

View File

@@ -18,7 +18,7 @@ datasets:
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.01 val_set_size: 0.01
adapter: lora adapter: lora
lora_model_dir: peft_model_dir:
sequence_len: 4096 sequence_len: 4096
sample_packing: sample_packing:
lora_r: 8 lora_r: 8

View File

@@ -20,7 +20,7 @@ sample_packing: true
pad_to_sequence_len: true pad_to_sequence_len: true
adapter: ia3 adapter: ia3
ia3_model_dir: peft_model_dir:
ia3_target_modules: ia3_target_modules:
- k_proj - k_proj
- v_proj - v_proj

View File

@@ -20,7 +20,7 @@ sample_packing: true
pad_to_sequence_len: true pad_to_sequence_len: true
adapter: lora adapter: lora
lora_model_dir: peft_model_dir:
lora_r: 32 lora_r: 32
lora_alpha: 16 lora_alpha: 16
lora_dropout: 0.05 lora_dropout: 0.05

View File

@@ -16,7 +16,7 @@ val_set_size: 0.01
output_dir: ./qlora-out output_dir: ./qlora-out
adapter: qlora adapter: qlora
lora_model_dir: peft_model_dir:
sequence_len: 4096 sequence_len: 4096
sample_packing: true sample_packing: true

View File

@@ -16,7 +16,7 @@ val_set_size: 0.01
output_dir: ./relora-out output_dir: ./relora-out
adapter: qlora adapter: qlora
lora_model_dir: peft_model_dir:
sequence_len: 4096 sequence_len: 4096
sample_packing: true sample_packing: true

View File

@@ -20,7 +20,7 @@ sequence_len: 4096
sample_packing: true sample_packing: true
adapter: lora adapter: lora
lora_model_dir: peft_model_dir:
lora_r: 32 lora_r: 32
lora_alpha: 16 lora_alpha: 16
lora_dropout: 0.05 lora_dropout: 0.05

View File

@@ -9,7 +9,7 @@ datasets:
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.02 val_set_size: 0.02
adapter: adapter:
lora_model_dir: peft_model_dir:
sequence_len: 2048 sequence_len: 2048
max_packed_sequence_len: max_packed_sequence_len:
lora_r: 8 lora_r: 8

View File

@@ -12,7 +12,7 @@ datasets:
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.02 val_set_size: 0.02
adapter: adapter:
lora_model_dir: peft_model_dir:
sequence_len: 1024 sequence_len: 1024
sample_packing: true sample_packing: true
lora_r: lora_r:

View File

@@ -12,7 +12,7 @@ datasets:
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.02 val_set_size: 0.02
adapter: lora adapter: lora
lora_model_dir: peft_model_dir:
sequence_len: 1024 sequence_len: 1024
sample_packing: true sample_packing: true
lora_r: 8 lora_r: 8

View File

@@ -12,7 +12,7 @@ datasets:
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.01 val_set_size: 0.01
adapter: qlora adapter: qlora
lora_model_dir: peft_model_dir:
sequence_len: 1024 sequence_len: 1024
sample_packing: true sample_packing: true
lora_r: 8 lora_r: 8

View File

@@ -22,7 +22,7 @@ sample_packing: true
pad_to_sequence_len: pad_to_sequence_len:
adapter: adapter:
lora_model_dir: peft_model_dir:
lora_r: lora_r:
lora_alpha: lora_alpha:
lora_dropout: lora_dropout:

View File

@@ -22,7 +22,7 @@ sample_packing: false # not CURRENTLY compatible with LoRAs
pad_to_sequence_len: pad_to_sequence_len:
adapter: qlora adapter: qlora
lora_model_dir: peft_model_dir:
lora_r: 64 lora_r: 64
lora_alpha: 32 lora_alpha: 32
lora_dropout: 0.05 lora_dropout: 0.05

View File

@@ -13,7 +13,7 @@ datasets:
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.05 val_set_size: 0.05
adapter: adapter:
lora_model_dir: peft_model_dir:
sequence_len: 2048 sequence_len: 2048
max_packed_sequence_len: 2048 max_packed_sequence_len: 2048
lora_r: 64 lora_r: 64

View File

@@ -7,7 +7,7 @@ datasets:
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.05 val_set_size: 0.05
adapter: lora adapter: lora
lora_model_dir: peft_model_dir:
sequence_len: 512 sequence_len: 512
lora_r: 16 lora_r: 16
lora_alpha: 32 lora_alpha: 32

View File

@@ -10,7 +10,7 @@ datasets:
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.02 val_set_size: 0.02
adapter: adapter:
lora_model_dir: peft_model_dir:
sequence_len: 2048 sequence_len: 2048
max_packed_sequence_len: max_packed_sequence_len:
lora_r: 8 lora_r: 8

View File

@@ -8,7 +8,7 @@ datasets:
dataset_prepared_path: dataset_prepared_path:
val_set_size: 0.05 val_set_size: 0.05
adapter: lora adapter: lora
lora_model_dir: peft_model_dir:
sequence_len: 2048 sequence_len: 2048
max_packed_sequence_len: max_packed_sequence_len:
lora_r: 8 lora_r: 8

View File

@@ -20,7 +20,7 @@ dataset_prepared_path:
val_set_size: 0.01 val_set_size: 0.01
# enable QLoRA # enable QLoRA
adapter: qlora adapter: qlora
lora_model_dir: peft_model_dir:
sequence_len: 8192 sequence_len: 8192
max_packed_sequence_len: max_packed_sequence_len:

View File

@@ -190,7 +190,10 @@ def validate_config(cfg):
raise ValueError("Require cfg.load_in_4bit to be True for qlora") raise ValueError("Require cfg.load_in_4bit to be True for qlora")
if not cfg.load_in_8bit and cfg.adapter == "lora": if not cfg.load_in_8bit and cfg.adapter == "lora":
LOG.warning("We recommend setting `load_in_8bit: true` for LORA finetuning") LOG.warning("We recommend setting `load_in_8bit: true` for LoRA finetuning")
if not cfg.load_in_8bit and cfg.adapter == "ia3":
LOG.warning("We recommend setting `load_in_8bit: true` for IA3 finetuning")
if cfg.relora_steps: if cfg.relora_steps:
if cfg.adapter not in ("lora", "qlora"): if cfg.adapter not in ("lora", "qlora"):

View File

@@ -406,23 +406,21 @@ def load_model(
if hasattr(module, "weight"): if hasattr(module, "weight"):
module.to(torch.float32) module.to(torch.float32)
needs_fa2_dtype = cfg.adapter or cfg.fsdp require_peft: bool = False
if ( if cfg.adapter in ["lora", "qlora", "ia3"]:
(cfg.adapter == "lora" and cfg.load_in_8bit) require_peft = True
or (cfg.adapter == "qlora" and cfg.load_in_4bit)
or (cfg.adapter == "ia3" and cfg.load_in_8bit) if require_peft:
):
LOG.info("converting PEFT model w/ prepare_model_for_kbit_training") LOG.info("converting PEFT model w/ prepare_model_for_kbit_training")
if cfg.gradient_checkpointing: if cfg.gradient_checkpointing:
model.gradient_checkpointing_enable() model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training( model = prepare_model_for_kbit_training(
model, use_gradient_checkpointing=cfg.gradient_checkpointing model, use_gradient_checkpointing=cfg.gradient_checkpointing
) )
needs_fa2_dtype = True
# LlamaRMSNorm layers are in fp32 after kbit_training or full finetune, so we need to # LlamaRMSNorm layers are in fp32 after kbit_training or full finetune, so we need to
# convert them back to fp16/bf16 for flash-attn compatibility. # convert them back to fp16/bf16 for flash-attn compatibility.
if needs_fa2_dtype or (cfg.flash_attention and cfg.is_llama_derived_model): if require_peft or cfg.fsdp or (cfg.flash_attention and cfg.is_llama_derived_model):
LOG.info("converting modules to %s for flash attention", cfg.torch_dtype) LOG.info("converting modules to %s for flash attention", cfg.torch_dtype)
for name, module in model.named_modules(): for name, module in model.named_modules():
if "norm" in name: if "norm" in name:
@@ -492,11 +490,11 @@ def load_llama_adapter(model, cfg):
task_type="CAUSAL_LM", task_type="CAUSAL_LM",
) )
if cfg.lora_model_dir: if cfg.peft_model_dir or cfg.lora_model_dir:
LOG.debug("Loading pretained PEFT - llama_adapter") LOG.debug("Loading pretained PEFT - llama_adapter")
model = PeftModel.from_pretrained( model = PeftModel.from_pretrained(
model, model,
cfg.lora_model_dir, cfg.peft_model_dir or cfg.lora_model_dir,
torch_dtype=torch.float16, torch_dtype=torch.float16,
) )
else: else:
@@ -548,11 +546,11 @@ def load_lora(model, cfg, inference=False):
task_type="CAUSAL_LM", task_type="CAUSAL_LM",
) )
if cfg.lora_model_dir: if cfg.peft_model_dir:
LOG.debug("Loading pretained PEFT - LoRA") LOG.debug("Loading pretained PEFT - LoRA")
model = PeftModel.from_pretrained( model = PeftModel.from_pretrained(
model, model,
cfg.lora_model_dir, cfg.peft_model_dir,
is_trainable=(not inference), is_trainable=(not inference),
) )
else: else:
@@ -581,11 +579,11 @@ def load_ia3(model, cfg, inference=False):
**ia3_config_kwargs, **ia3_config_kwargs,
) )
if cfg.ia3_model_dir: if cfg.peft_model_dir:
LOG.debug("Loading pretained PEFT - IA3") LOG.debug("Loading pretained PEFT - IA3")
model = PeftModel.from_pretrained( model = PeftModel.from_pretrained(
model, model,
cfg.ia3_model_dir, cfg.peft_model_dir,
is_trainable=(not inference), is_trainable=(not inference),
) )
else: else: