consolidate as peft_model_dir

This commit is contained in:
Wing Lian
2023-09-19 19:02:14 -04:00
parent ba85308720
commit 203369411e
33 changed files with 50 additions and 49 deletions

View File

@@ -96,7 +96,7 @@ accelerate launch -m axolotl.cli.train examples/openllama-3b/lora.yml
# inference
accelerate launch -m axolotl.cli.inference examples/openllama-3b/lora.yml \
--lora_model_dir="./lora-out"
--peft_model_dir="./lora-out"
```
## Installation
@@ -531,7 +531,7 @@ total_num_tokens:
adapter: lora
# If you already have a lora model trained that you want to load, put that here.
# This means after training, if you want to test the model, you should set this to the value of `lora_out_dir`.
lora_model_dir:
peft_model_dir:
# LoRA hyperparameters
# For more details about the following options, see:
@@ -869,7 +869,7 @@ Pass the appropriate flag to the train command:
- Pretrained LORA:
```bash
python -m axolotl.cli.inference examples/your_config.yml --lora_model_dir="./lora-output-dir"
python -m axolotl.cli.inference examples/your_config.yml --peft_model_dir="./lora-output-dir"
```
- Full weights finetune:
```bash
@@ -890,7 +890,7 @@ Please use `--sample_packing False` if you have it on and receive the error simi
Add below flag to train command above
```bash
python3 -m axolotl.cli.merge_lora examples/your_config.yml --lora_model_dir="./completed-model" --load_in_8bit=False --load_in_4bit=False
python3 -m axolotl.cli.merge_lora examples/your_config.yml --peft_model_dir="./completed-model" --load_in_8bit=False --load_in_4bit=False
```
If you run out of CUDA memory, you can try to merge in system RAM with

View File

@@ -18,7 +18,7 @@ dataset_prepared_path: last_prepared_run
val_set_size: 0.01
adapter:
lora_model_dir:
peft_model_dir:
sequence_len: 2048
max_packed_sequence_len:
sample_packing: false

View File

@@ -10,7 +10,7 @@ datasets:
dataset_prepared_path:
val_set_size: 0.01
adapter: qlora
lora_model_dir:
peft_model_dir:
sequence_len: 2048
max_packed_sequence_len: 2048
lora_r: 16

View File

@@ -20,7 +20,7 @@ sample_packing: true
pad_to_sequence_len: true
adapter: lora
lora_model_dir:
peft_model_dir:
lora_r: 32
lora_alpha: 16
lora_dropout: 0.05

View File

@@ -16,7 +16,7 @@ val_set_size: 0.01
output_dir: ./qlora-out
adapter: qlora
lora_model_dir:
peft_model_dir:
sequence_len: 4096
sample_packing: true

View File

@@ -20,7 +20,7 @@ sample_packing: true
pad_to_sequence_len: true
adapter: lora
lora_model_dir:
peft_model_dir:
lora_r: 32
lora_alpha: 16
lora_dropout: 0.05

View File

@@ -16,7 +16,7 @@ val_set_size: 0.01
output_dir: ./qlora-out
adapter: qlora
lora_model_dir:
peft_model_dir:
sequence_len: 4096
sample_packing: true

View File

@@ -20,7 +20,7 @@ sample_packing: true
pad_to_sequence_len: true
adapter: lora
lora_model_dir:
peft_model_dir:
lora_r: 32
lora_alpha: 16
lora_dropout: 0.05

View File

@@ -16,7 +16,7 @@ val_set_size: 0.01
output_dir: ./qlora-out
adapter: qlora
lora_model_dir:
peft_model_dir:
sequence_len: 4096
sample_packing: true

View File

@@ -15,7 +15,7 @@ datasets:
dataset_prepared_path:
val_set_size: 0.01
adapter: lora
lora_model_dir:
peft_model_dir:
sequence_len: 2048
max_packed_sequence_len:
lora_r: 16

View File

@@ -22,7 +22,7 @@ dataset_prepared_path:
val_set_size: 0.01
# enable QLoRA
adapter: qlora
lora_model_dir:
peft_model_dir:
sequence_len: 2048
max_packed_sequence_len:

View File

@@ -15,7 +15,7 @@ datasets:
dataset_prepared_path:
val_set_size: 0.01
adapter:
lora_model_dir:
peft_model_dir:
sequence_len: 2048
max_packed_sequence_len:
lora_r: 64

View File

@@ -10,7 +10,7 @@ datasets:
dataset_prepared_path:
val_set_size: 0.01
adapter: qlora
lora_model_dir:
peft_model_dir:
sequence_len: 2048
max_packed_sequence_len:
lora_r: 8

View File

@@ -9,7 +9,7 @@ datasets:
dataset_prepared_path:
val_set_size: 0.02
adapter:
lora_model_dir:
peft_model_dir:
sequence_len: 512
max_packed_sequence_len:
lora_r:

View File

@@ -18,7 +18,7 @@ datasets:
dataset_prepared_path:
val_set_size: 0.01
adapter: lora
lora_model_dir:
peft_model_dir:
sequence_len: 4096
sample_packing:
lora_r: 8

View File

@@ -20,7 +20,7 @@ sample_packing: true
pad_to_sequence_len: true
adapter: ia3
ia3_model_dir:
peft_model_dir:
ia3_target_modules:
- k_proj
- v_proj

View File

@@ -20,7 +20,7 @@ sample_packing: true
pad_to_sequence_len: true
adapter: lora
lora_model_dir:
peft_model_dir:
lora_r: 32
lora_alpha: 16
lora_dropout: 0.05

View File

@@ -16,7 +16,7 @@ val_set_size: 0.01
output_dir: ./qlora-out
adapter: qlora
lora_model_dir:
peft_model_dir:
sequence_len: 4096
sample_packing: true

View File

@@ -16,7 +16,7 @@ val_set_size: 0.01
output_dir: ./relora-out
adapter: qlora
lora_model_dir:
peft_model_dir:
sequence_len: 4096
sample_packing: true

View File

@@ -20,7 +20,7 @@ sequence_len: 4096
sample_packing: true
adapter: lora
lora_model_dir:
peft_model_dir:
lora_r: 32
lora_alpha: 16
lora_dropout: 0.05

View File

@@ -9,7 +9,7 @@ datasets:
dataset_prepared_path:
val_set_size: 0.02
adapter:
lora_model_dir:
peft_model_dir:
sequence_len: 2048
max_packed_sequence_len:
lora_r: 8

View File

@@ -12,7 +12,7 @@ datasets:
dataset_prepared_path:
val_set_size: 0.02
adapter:
lora_model_dir:
peft_model_dir:
sequence_len: 1024
sample_packing: true
lora_r:

View File

@@ -12,7 +12,7 @@ datasets:
dataset_prepared_path:
val_set_size: 0.02
adapter: lora
lora_model_dir:
peft_model_dir:
sequence_len: 1024
sample_packing: true
lora_r: 8

View File

@@ -12,7 +12,7 @@ datasets:
dataset_prepared_path:
val_set_size: 0.01
adapter: qlora
lora_model_dir:
peft_model_dir:
sequence_len: 1024
sample_packing: true
lora_r: 8

View File

@@ -22,7 +22,7 @@ sample_packing: true
pad_to_sequence_len:
adapter:
lora_model_dir:
peft_model_dir:
lora_r:
lora_alpha:
lora_dropout:

View File

@@ -22,7 +22,7 @@ sample_packing: false # not CURRENTLY compatible with LoRAs
pad_to_sequence_len:
adapter: qlora
lora_model_dir:
peft_model_dir:
lora_r: 64
lora_alpha: 32
lora_dropout: 0.05

View File

@@ -13,7 +13,7 @@ datasets:
dataset_prepared_path:
val_set_size: 0.05
adapter:
lora_model_dir:
peft_model_dir:
sequence_len: 2048
max_packed_sequence_len: 2048
lora_r: 64

View File

@@ -7,7 +7,7 @@ datasets:
dataset_prepared_path:
val_set_size: 0.05
adapter: lora
lora_model_dir:
peft_model_dir:
sequence_len: 512
lora_r: 16
lora_alpha: 32

View File

@@ -10,7 +10,7 @@ datasets:
dataset_prepared_path:
val_set_size: 0.02
adapter:
lora_model_dir:
peft_model_dir:
sequence_len: 2048
max_packed_sequence_len:
lora_r: 8

View File

@@ -8,7 +8,7 @@ datasets:
dataset_prepared_path:
val_set_size: 0.05
adapter: lora
lora_model_dir:
peft_model_dir:
sequence_len: 2048
max_packed_sequence_len:
lora_r: 8

View File

@@ -20,7 +20,7 @@ dataset_prepared_path:
val_set_size: 0.01
# enable QLoRA
adapter: qlora
lora_model_dir:
peft_model_dir:
sequence_len: 8192
max_packed_sequence_len:

View File

@@ -190,7 +190,10 @@ def validate_config(cfg):
raise ValueError("Require cfg.load_in_4bit to be True for qlora")
if not cfg.load_in_8bit and cfg.adapter == "lora":
LOG.warning("We recommend setting `load_in_8bit: true` for LORA finetuning")
LOG.warning("We recommend setting `load_in_8bit: true` for LoRA finetuning")
if not cfg.load_in_8bit and cfg.adapter == "ia3":
LOG.warning("We recommend setting `load_in_8bit: true` for IA3 finetuning")
if cfg.relora_steps:
if cfg.adapter not in ("lora", "qlora"):

View File

@@ -406,23 +406,21 @@ def load_model(
if hasattr(module, "weight"):
module.to(torch.float32)
needs_fa2_dtype = cfg.adapter or cfg.fsdp
if (
(cfg.adapter == "lora" and cfg.load_in_8bit)
or (cfg.adapter == "qlora" and cfg.load_in_4bit)
or (cfg.adapter == "ia3" and cfg.load_in_8bit)
):
require_peft: bool = False
if cfg.adapter in ["lora", "qlora", "ia3"]:
require_peft = True
if require_peft:
LOG.info("converting PEFT model w/ prepare_model_for_kbit_training")
if cfg.gradient_checkpointing:
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(
model, use_gradient_checkpointing=cfg.gradient_checkpointing
)
needs_fa2_dtype = True
# LlamaRMSNorm layers are in fp32 after kbit_training or full finetune, so we need to
# convert them back to fp16/bf16 for flash-attn compatibility.
if needs_fa2_dtype or (cfg.flash_attention and cfg.is_llama_derived_model):
if require_peft or cfg.fsdp or (cfg.flash_attention and cfg.is_llama_derived_model):
LOG.info("converting modules to %s for flash attention", cfg.torch_dtype)
for name, module in model.named_modules():
if "norm" in name:
@@ -492,11 +490,11 @@ def load_llama_adapter(model, cfg):
task_type="CAUSAL_LM",
)
if cfg.lora_model_dir:
if cfg.peft_model_dir or cfg.lora_model_dir:
LOG.debug("Loading pretained PEFT - llama_adapter")
model = PeftModel.from_pretrained(
model,
cfg.lora_model_dir,
cfg.peft_model_dir or cfg.lora_model_dir,
torch_dtype=torch.float16,
)
else:
@@ -548,11 +546,11 @@ def load_lora(model, cfg, inference=False):
task_type="CAUSAL_LM",
)
if cfg.lora_model_dir:
if cfg.peft_model_dir:
LOG.debug("Loading pretained PEFT - LoRA")
model = PeftModel.from_pretrained(
model,
cfg.lora_model_dir,
cfg.peft_model_dir,
is_trainable=(not inference),
)
else:
@@ -581,11 +579,11 @@ def load_ia3(model, cfg, inference=False):
**ia3_config_kwargs,
)
if cfg.ia3_model_dir:
if cfg.peft_model_dir:
LOG.debug("Loading pretained PEFT - IA3")
model = PeftModel.from_pretrained(
model,
cfg.ia3_model_dir,
cfg.peft_model_dir,
is_trainable=(not inference),
)
else: