consolidate as peft_model_dir
This commit is contained in:
@@ -96,7 +96,7 @@ accelerate launch -m axolotl.cli.train examples/openllama-3b/lora.yml
|
|||||||
|
|
||||||
# inference
|
# inference
|
||||||
accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
|
accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
|
||||||
--lora_model_dir="./lora-out"
|
--peft_model_dir="./lora-out"
|
||||||
```
|
```
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
@@ -531,7 +531,7 @@ total_num_tokens:
|
|||||||
adapter: lora
|
adapter: lora
|
||||||
# If you already have a lora model trained that you want to load, put that here.
|
# If you already have a lora model trained that you want to load, put that here.
|
||||||
# This means after training, if you want to test the model, you should set this to the value of `lora_out_dir`.
|
# This means after training, if you want to test the model, you should set this to the value of `lora_out_dir`.
|
||||||
lora_model_dir:
|
peft_model_dir:
|
||||||
|
|
||||||
# LoRA hyperparameters
|
# LoRA hyperparameters
|
||||||
# For more details about the following options, see:
|
# For more details about the following options, see:
|
||||||
@@ -869,7 +869,7 @@ Pass the appropriate flag to the train command:
|
|||||||
|
|
||||||
- Pretrained LORA:
|
- Pretrained LORA:
|
||||||
```bash
|
```bash
|
||||||
python -m axolotl.cli.inference examples/your_config.yml --lora_model_dir="./lora-output-dir"
|
python -m axolotl.cli.inference examples/your_config.yml --peft_model_dir="./lora-output-dir"
|
||||||
```
|
```
|
||||||
- Full weights finetune:
|
- Full weights finetune:
|
||||||
```bash
|
```bash
|
||||||
@@ -890,7 +890,7 @@ Please use `--sample_packing False` if you have it on and receive the error simi
|
|||||||
Add below flag to train command above
|
Add below flag to train command above
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python3 -m axolotl.cli.merge_lora examples/your_config.yml --lora_model_dir="./completed-model" --load_in_8bit=False --load_in_4bit=False
|
python3 -m axolotl.cli.merge_lora examples/your_config.yml --peft_model_dir="./completed-model" --load_in_8bit=False --load_in_4bit=False
|
||||||
```
|
```
|
||||||
|
|
||||||
If you run out of CUDA memory, you can try to merge in system RAM with
|
If you run out of CUDA memory, you can try to merge in system RAM with
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ dataset_prepared_path: last_prepared_run
|
|||||||
val_set_size: 0.01
|
val_set_size: 0.01
|
||||||
|
|
||||||
adapter:
|
adapter:
|
||||||
lora_model_dir:
|
peft_model_dir:
|
||||||
sequence_len: 2048
|
sequence_len: 2048
|
||||||
max_packed_sequence_len:
|
max_packed_sequence_len:
|
||||||
sample_packing: false
|
sample_packing: false
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ datasets:
|
|||||||
dataset_prepared_path:
|
dataset_prepared_path:
|
||||||
val_set_size: 0.01
|
val_set_size: 0.01
|
||||||
adapter: qlora
|
adapter: qlora
|
||||||
lora_model_dir:
|
peft_model_dir:
|
||||||
sequence_len: 2048
|
sequence_len: 2048
|
||||||
max_packed_sequence_len: 2048
|
max_packed_sequence_len: 2048
|
||||||
lora_r: 16
|
lora_r: 16
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ sample_packing: true
|
|||||||
pad_to_sequence_len: true
|
pad_to_sequence_len: true
|
||||||
|
|
||||||
adapter: lora
|
adapter: lora
|
||||||
lora_model_dir:
|
peft_model_dir:
|
||||||
lora_r: 32
|
lora_r: 32
|
||||||
lora_alpha: 16
|
lora_alpha: 16
|
||||||
lora_dropout: 0.05
|
lora_dropout: 0.05
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ val_set_size: 0.01
|
|||||||
output_dir: ./qlora-out
|
output_dir: ./qlora-out
|
||||||
|
|
||||||
adapter: qlora
|
adapter: qlora
|
||||||
lora_model_dir:
|
peft_model_dir:
|
||||||
|
|
||||||
sequence_len: 4096
|
sequence_len: 4096
|
||||||
sample_packing: true
|
sample_packing: true
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ sample_packing: true
|
|||||||
pad_to_sequence_len: true
|
pad_to_sequence_len: true
|
||||||
|
|
||||||
adapter: lora
|
adapter: lora
|
||||||
lora_model_dir:
|
peft_model_dir:
|
||||||
lora_r: 32
|
lora_r: 32
|
||||||
lora_alpha: 16
|
lora_alpha: 16
|
||||||
lora_dropout: 0.05
|
lora_dropout: 0.05
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ val_set_size: 0.01
|
|||||||
output_dir: ./qlora-out
|
output_dir: ./qlora-out
|
||||||
|
|
||||||
adapter: qlora
|
adapter: qlora
|
||||||
lora_model_dir:
|
peft_model_dir:
|
||||||
|
|
||||||
sequence_len: 4096
|
sequence_len: 4096
|
||||||
sample_packing: true
|
sample_packing: true
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ sample_packing: true
|
|||||||
pad_to_sequence_len: true
|
pad_to_sequence_len: true
|
||||||
|
|
||||||
adapter: lora
|
adapter: lora
|
||||||
lora_model_dir:
|
peft_model_dir:
|
||||||
lora_r: 32
|
lora_r: 32
|
||||||
lora_alpha: 16
|
lora_alpha: 16
|
||||||
lora_dropout: 0.05
|
lora_dropout: 0.05
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ val_set_size: 0.01
|
|||||||
output_dir: ./qlora-out
|
output_dir: ./qlora-out
|
||||||
|
|
||||||
adapter: qlora
|
adapter: qlora
|
||||||
lora_model_dir:
|
peft_model_dir:
|
||||||
|
|
||||||
sequence_len: 4096
|
sequence_len: 4096
|
||||||
sample_packing: true
|
sample_packing: true
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ datasets:
|
|||||||
dataset_prepared_path:
|
dataset_prepared_path:
|
||||||
val_set_size: 0.01
|
val_set_size: 0.01
|
||||||
adapter: lora
|
adapter: lora
|
||||||
lora_model_dir:
|
peft_model_dir:
|
||||||
sequence_len: 2048
|
sequence_len: 2048
|
||||||
max_packed_sequence_len:
|
max_packed_sequence_len:
|
||||||
lora_r: 16
|
lora_r: 16
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ dataset_prepared_path:
|
|||||||
val_set_size: 0.01
|
val_set_size: 0.01
|
||||||
# enable QLoRA
|
# enable QLoRA
|
||||||
adapter: qlora
|
adapter: qlora
|
||||||
lora_model_dir:
|
peft_model_dir:
|
||||||
sequence_len: 2048
|
sequence_len: 2048
|
||||||
max_packed_sequence_len:
|
max_packed_sequence_len:
|
||||||
|
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ datasets:
|
|||||||
dataset_prepared_path:
|
dataset_prepared_path:
|
||||||
val_set_size: 0.01
|
val_set_size: 0.01
|
||||||
adapter:
|
adapter:
|
||||||
lora_model_dir:
|
peft_model_dir:
|
||||||
sequence_len: 2048
|
sequence_len: 2048
|
||||||
max_packed_sequence_len:
|
max_packed_sequence_len:
|
||||||
lora_r: 64
|
lora_r: 64
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ datasets:
|
|||||||
dataset_prepared_path:
|
dataset_prepared_path:
|
||||||
val_set_size: 0.01
|
val_set_size: 0.01
|
||||||
adapter: qlora
|
adapter: qlora
|
||||||
lora_model_dir:
|
peft_model_dir:
|
||||||
sequence_len: 2048
|
sequence_len: 2048
|
||||||
max_packed_sequence_len:
|
max_packed_sequence_len:
|
||||||
lora_r: 8
|
lora_r: 8
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ datasets:
|
|||||||
dataset_prepared_path:
|
dataset_prepared_path:
|
||||||
val_set_size: 0.02
|
val_set_size: 0.02
|
||||||
adapter:
|
adapter:
|
||||||
lora_model_dir:
|
peft_model_dir:
|
||||||
sequence_len: 512
|
sequence_len: 512
|
||||||
max_packed_sequence_len:
|
max_packed_sequence_len:
|
||||||
lora_r:
|
lora_r:
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ datasets:
|
|||||||
dataset_prepared_path:
|
dataset_prepared_path:
|
||||||
val_set_size: 0.01
|
val_set_size: 0.01
|
||||||
adapter: lora
|
adapter: lora
|
||||||
lora_model_dir:
|
peft_model_dir:
|
||||||
sequence_len: 4096
|
sequence_len: 4096
|
||||||
sample_packing:
|
sample_packing:
|
||||||
lora_r: 8
|
lora_r: 8
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ sample_packing: true
|
|||||||
pad_to_sequence_len: true
|
pad_to_sequence_len: true
|
||||||
|
|
||||||
adapter: ia3
|
adapter: ia3
|
||||||
ia3_model_dir:
|
peft_model_dir:
|
||||||
ia3_target_modules:
|
ia3_target_modules:
|
||||||
- k_proj
|
- k_proj
|
||||||
- v_proj
|
- v_proj
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ sample_packing: true
|
|||||||
pad_to_sequence_len: true
|
pad_to_sequence_len: true
|
||||||
|
|
||||||
adapter: lora
|
adapter: lora
|
||||||
lora_model_dir:
|
peft_model_dir:
|
||||||
lora_r: 32
|
lora_r: 32
|
||||||
lora_alpha: 16
|
lora_alpha: 16
|
||||||
lora_dropout: 0.05
|
lora_dropout: 0.05
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ val_set_size: 0.01
|
|||||||
output_dir: ./qlora-out
|
output_dir: ./qlora-out
|
||||||
|
|
||||||
adapter: qlora
|
adapter: qlora
|
||||||
lora_model_dir:
|
peft_model_dir:
|
||||||
|
|
||||||
sequence_len: 4096
|
sequence_len: 4096
|
||||||
sample_packing: true
|
sample_packing: true
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ val_set_size: 0.01
|
|||||||
output_dir: ./relora-out
|
output_dir: ./relora-out
|
||||||
|
|
||||||
adapter: qlora
|
adapter: qlora
|
||||||
lora_model_dir:
|
peft_model_dir:
|
||||||
|
|
||||||
sequence_len: 4096
|
sequence_len: 4096
|
||||||
sample_packing: true
|
sample_packing: true
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ sequence_len: 4096
|
|||||||
sample_packing: true
|
sample_packing: true
|
||||||
|
|
||||||
adapter: lora
|
adapter: lora
|
||||||
lora_model_dir:
|
peft_model_dir:
|
||||||
lora_r: 32
|
lora_r: 32
|
||||||
lora_alpha: 16
|
lora_alpha: 16
|
||||||
lora_dropout: 0.05
|
lora_dropout: 0.05
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ datasets:
|
|||||||
dataset_prepared_path:
|
dataset_prepared_path:
|
||||||
val_set_size: 0.02
|
val_set_size: 0.02
|
||||||
adapter:
|
adapter:
|
||||||
lora_model_dir:
|
peft_model_dir:
|
||||||
sequence_len: 2048
|
sequence_len: 2048
|
||||||
max_packed_sequence_len:
|
max_packed_sequence_len:
|
||||||
lora_r: 8
|
lora_r: 8
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ datasets:
|
|||||||
dataset_prepared_path:
|
dataset_prepared_path:
|
||||||
val_set_size: 0.02
|
val_set_size: 0.02
|
||||||
adapter:
|
adapter:
|
||||||
lora_model_dir:
|
peft_model_dir:
|
||||||
sequence_len: 1024
|
sequence_len: 1024
|
||||||
sample_packing: true
|
sample_packing: true
|
||||||
lora_r:
|
lora_r:
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ datasets:
|
|||||||
dataset_prepared_path:
|
dataset_prepared_path:
|
||||||
val_set_size: 0.02
|
val_set_size: 0.02
|
||||||
adapter: lora
|
adapter: lora
|
||||||
lora_model_dir:
|
peft_model_dir:
|
||||||
sequence_len: 1024
|
sequence_len: 1024
|
||||||
sample_packing: true
|
sample_packing: true
|
||||||
lora_r: 8
|
lora_r: 8
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ datasets:
|
|||||||
dataset_prepared_path:
|
dataset_prepared_path:
|
||||||
val_set_size: 0.01
|
val_set_size: 0.01
|
||||||
adapter: qlora
|
adapter: qlora
|
||||||
lora_model_dir:
|
peft_model_dir:
|
||||||
sequence_len: 1024
|
sequence_len: 1024
|
||||||
sample_packing: true
|
sample_packing: true
|
||||||
lora_r: 8
|
lora_r: 8
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ sample_packing: true
|
|||||||
pad_to_sequence_len:
|
pad_to_sequence_len:
|
||||||
|
|
||||||
adapter:
|
adapter:
|
||||||
lora_model_dir:
|
peft_model_dir:
|
||||||
lora_r:
|
lora_r:
|
||||||
lora_alpha:
|
lora_alpha:
|
||||||
lora_dropout:
|
lora_dropout:
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ sample_packing: false # not CURRENTLY compatible with LoRAs
|
|||||||
pad_to_sequence_len:
|
pad_to_sequence_len:
|
||||||
|
|
||||||
adapter: qlora
|
adapter: qlora
|
||||||
lora_model_dir:
|
peft_model_dir:
|
||||||
lora_r: 64
|
lora_r: 64
|
||||||
lora_alpha: 32
|
lora_alpha: 32
|
||||||
lora_dropout: 0.05
|
lora_dropout: 0.05
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ datasets:
|
|||||||
dataset_prepared_path:
|
dataset_prepared_path:
|
||||||
val_set_size: 0.05
|
val_set_size: 0.05
|
||||||
adapter:
|
adapter:
|
||||||
lora_model_dir:
|
peft_model_dir:
|
||||||
sequence_len: 2048
|
sequence_len: 2048
|
||||||
max_packed_sequence_len: 2048
|
max_packed_sequence_len: 2048
|
||||||
lora_r: 64
|
lora_r: 64
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ datasets:
|
|||||||
dataset_prepared_path:
|
dataset_prepared_path:
|
||||||
val_set_size: 0.05
|
val_set_size: 0.05
|
||||||
adapter: lora
|
adapter: lora
|
||||||
lora_model_dir:
|
peft_model_dir:
|
||||||
sequence_len: 512
|
sequence_len: 512
|
||||||
lora_r: 16
|
lora_r: 16
|
||||||
lora_alpha: 32
|
lora_alpha: 32
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ datasets:
|
|||||||
dataset_prepared_path:
|
dataset_prepared_path:
|
||||||
val_set_size: 0.02
|
val_set_size: 0.02
|
||||||
adapter:
|
adapter:
|
||||||
lora_model_dir:
|
peft_model_dir:
|
||||||
sequence_len: 2048
|
sequence_len: 2048
|
||||||
max_packed_sequence_len:
|
max_packed_sequence_len:
|
||||||
lora_r: 8
|
lora_r: 8
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ datasets:
|
|||||||
dataset_prepared_path:
|
dataset_prepared_path:
|
||||||
val_set_size: 0.05
|
val_set_size: 0.05
|
||||||
adapter: lora
|
adapter: lora
|
||||||
lora_model_dir:
|
peft_model_dir:
|
||||||
sequence_len: 2048
|
sequence_len: 2048
|
||||||
max_packed_sequence_len:
|
max_packed_sequence_len:
|
||||||
lora_r: 8
|
lora_r: 8
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ dataset_prepared_path:
|
|||||||
val_set_size: 0.01
|
val_set_size: 0.01
|
||||||
# enable QLoRA
|
# enable QLoRA
|
||||||
adapter: qlora
|
adapter: qlora
|
||||||
lora_model_dir:
|
peft_model_dir:
|
||||||
sequence_len: 8192
|
sequence_len: 8192
|
||||||
max_packed_sequence_len:
|
max_packed_sequence_len:
|
||||||
|
|
||||||
|
|||||||
@@ -190,7 +190,10 @@ def validate_config(cfg):
|
|||||||
raise ValueError("Require cfg.load_in_4bit to be True for qlora")
|
raise ValueError("Require cfg.load_in_4bit to be True for qlora")
|
||||||
|
|
||||||
if not cfg.load_in_8bit and cfg.adapter == "lora":
|
if not cfg.load_in_8bit and cfg.adapter == "lora":
|
||||||
LOG.warning("We recommend setting `load_in_8bit: true` for LORA finetuning")
|
LOG.warning("We recommend setting `load_in_8bit: true` for LoRA finetuning")
|
||||||
|
|
||||||
|
if not cfg.load_in_8bit and cfg.adapter == "ia3":
|
||||||
|
LOG.warning("We recommend setting `load_in_8bit: true` for IA3 finetuning")
|
||||||
|
|
||||||
if cfg.relora_steps:
|
if cfg.relora_steps:
|
||||||
if cfg.adapter not in ("lora", "qlora"):
|
if cfg.adapter not in ("lora", "qlora"):
|
||||||
|
|||||||
@@ -406,23 +406,21 @@ def load_model(
|
|||||||
if hasattr(module, "weight"):
|
if hasattr(module, "weight"):
|
||||||
module.to(torch.float32)
|
module.to(torch.float32)
|
||||||
|
|
||||||
needs_fa2_dtype = cfg.adapter or cfg.fsdp
|
require_peft: bool = False
|
||||||
if (
|
if cfg.adapter in ["lora", "qlora", "ia3"]:
|
||||||
(cfg.adapter == "lora" and cfg.load_in_8bit)
|
require_peft = True
|
||||||
or (cfg.adapter == "qlora" and cfg.load_in_4bit)
|
|
||||||
or (cfg.adapter == "ia3" and cfg.load_in_8bit)
|
if require_peft:
|
||||||
):
|
|
||||||
LOG.info("converting PEFT model w/ prepare_model_for_kbit_training")
|
LOG.info("converting PEFT model w/ prepare_model_for_kbit_training")
|
||||||
if cfg.gradient_checkpointing:
|
if cfg.gradient_checkpointing:
|
||||||
model.gradient_checkpointing_enable()
|
model.gradient_checkpointing_enable()
|
||||||
model = prepare_model_for_kbit_training(
|
model = prepare_model_for_kbit_training(
|
||||||
model, use_gradient_checkpointing=cfg.gradient_checkpointing
|
model, use_gradient_checkpointing=cfg.gradient_checkpointing
|
||||||
)
|
)
|
||||||
needs_fa2_dtype = True
|
|
||||||
|
|
||||||
# LlamaRMSNorm layers are in fp32 after kbit_training or full finetune, so we need to
|
# LlamaRMSNorm layers are in fp32 after kbit_training or full finetune, so we need to
|
||||||
# convert them back to fp16/bf16 for flash-attn compatibility.
|
# convert them back to fp16/bf16 for flash-attn compatibility.
|
||||||
if needs_fa2_dtype or (cfg.flash_attention and cfg.is_llama_derived_model):
|
if require_peft or cfg.fsdp or (cfg.flash_attention and cfg.is_llama_derived_model):
|
||||||
LOG.info("converting modules to %s for flash attention", cfg.torch_dtype)
|
LOG.info("converting modules to %s for flash attention", cfg.torch_dtype)
|
||||||
for name, module in model.named_modules():
|
for name, module in model.named_modules():
|
||||||
if "norm" in name:
|
if "norm" in name:
|
||||||
@@ -492,11 +490,11 @@ def load_llama_adapter(model, cfg):
|
|||||||
task_type="CAUSAL_LM",
|
task_type="CAUSAL_LM",
|
||||||
)
|
)
|
||||||
|
|
||||||
if cfg.lora_model_dir:
|
if cfg.peft_model_dir or cfg.lora_model_dir:
|
||||||
LOG.debug("Loading pretained PEFT - llama_adapter")
|
LOG.debug("Loading pretained PEFT - llama_adapter")
|
||||||
model = PeftModel.from_pretrained(
|
model = PeftModel.from_pretrained(
|
||||||
model,
|
model,
|
||||||
cfg.lora_model_dir,
|
cfg.peft_model_dir or cfg.lora_model_dir,
|
||||||
torch_dtype=torch.float16,
|
torch_dtype=torch.float16,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
@@ -548,11 +546,11 @@ def load_lora(model, cfg, inference=False):
|
|||||||
task_type="CAUSAL_LM",
|
task_type="CAUSAL_LM",
|
||||||
)
|
)
|
||||||
|
|
||||||
if cfg.lora_model_dir:
|
if cfg.peft_model_dir:
|
||||||
LOG.debug("Loading pretained PEFT - LoRA")
|
LOG.debug("Loading pretained PEFT - LoRA")
|
||||||
model = PeftModel.from_pretrained(
|
model = PeftModel.from_pretrained(
|
||||||
model,
|
model,
|
||||||
cfg.lora_model_dir,
|
cfg.peft_model_dir,
|
||||||
is_trainable=(not inference),
|
is_trainable=(not inference),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
@@ -581,11 +579,11 @@ def load_ia3(model, cfg, inference=False):
|
|||||||
**ia3_config_kwargs,
|
**ia3_config_kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
if cfg.ia3_model_dir:
|
if cfg.peft_model_dir:
|
||||||
LOG.debug("Loading pretained PEFT - IA3")
|
LOG.debug("Loading pretained PEFT - IA3")
|
||||||
model = PeftModel.from_pretrained(
|
model = PeftModel.from_pretrained(
|
||||||
model,
|
model,
|
||||||
cfg.ia3_model_dir,
|
cfg.peft_model_dir,
|
||||||
is_trainable=(not inference),
|
is_trainable=(not inference),
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
|||||||
Reference in New Issue
Block a user