migrate lora_ to peft_

This commit is contained in:
Wing Lian
2023-09-28 11:58:23 -04:00
parent 481ef187a5
commit 0bd89b38c6
5 changed files with 106 additions and 48 deletions

View File

@@ -384,10 +384,10 @@ See [examples](examples) for quick start. It is recommended to duplicate and mod
- lora
```yaml
adapter: lora # qlora or leave blank for full finetune
lora_r: 8
lora_alpha: 16
lora_dropout: 0.05
lora_target_modules:
peft_r: 8
peft_alpha: 16
peft_dropout: 0.05
peft_target_modules:
- q_proj
- v_proj
```
@@ -536,10 +536,10 @@ peft_model_dir:
# LoRA hyperparameters
# For more details about the following options, see:
# https://www.anyscale.com/blog/fine-tuning-llms-lora-or-full-parameter-an-in-depth-analysis-with-llama-2
lora_r: 8
lora_alpha: 16
lora_dropout: 0.05
lora_target_modules:
peft_r: 8
peft_alpha: 16
peft_dropout: 0.05
peft_target_modules:
- q_proj
- v_proj
# - k_proj
@@ -547,13 +547,13 @@ lora_target_modules:
# - gate_proj
# - down_proj
# - up_proj
lora_target_linear: # If true, will target all linear layers
peft_target_linear: # if true, will target all linear layers
# If you added new tokens to the tokenizer, you may need to save some LoRA modules because they need to know the new tokens.
# For LLaMA and Mistral, you need to save `embed_tokens` and `lm_head`. It may vary for other models.
# `embed_tokens` converts tokens to embeddings, and `lm_head` converts embeddings to token probabilities.
# https://github.com/huggingface/peft/issues/334#issuecomment-1561727994
lora_modules_to_save:
peft_modules_to_save:
# - embed_tokens
# - lm_head
@@ -561,10 +561,8 @@ lora_modules_to_save:
# If you merge the adapter to the base model, a subdirectory `merged` will be created under this directory.
# Make sure `lora_model_dir` points to this directory if you want to use the trained model.
lora_out_dir:
lora_fan_in_fan_out: false
ia3_target_modules: # target modules for IA3, for llama, k, v, and down projections
ia3_feedforward_modules: # ffn modules for IA3, for llama down projection
ia3_fan_in_fan_out:
peft_fan_in_fan_out: false
peft_feedforward_modules: # ffn modules for IA3, for llama down projection
# ReLoRA configuration
# Must use either 'lora' or 'qlora' adapter, and does not support fsdp or deepspeed

View File

@@ -21,13 +21,13 @@ pad_to_sequence_len: true
adapter: ia3
peft_model_dir:
ia3_target_modules:
peft_target_modules:
- k_proj
- v_proj
- down_proj
ia3_feedforward_modules:
peft_feedforward_modules:
- down_proj
ia3_fan_in_fan_out: false
peft_fan_in_fan_out: false
wandb_project:
wandb_entity:

View File

@@ -121,6 +121,18 @@ def normalize_config(cfg):
log_gpu_memory_usage(LOG, "baseline", cfg.device)
if cfg.adapter is not None:
for key in list(cfg.keys()):
if key.startswith("lora_"):
new_key = key.replace("lora_", "peft_")
LOG.warning(
PendingDeprecationWarning(
f"{key} soon to be deprecated. please use {new_key}"
)
)
cfg[new_key] = cfg[key]
del cfg[key]
def validate_config(cfg):
if is_torch_bf16_gpu_available():

View File

@@ -490,11 +490,11 @@ def load_llama_adapter(model, cfg):
task_type="CAUSAL_LM",
)
if cfg.peft_model_dir or cfg.lora_model_dir:
if cfg.peft_model_dir:
LOG.debug("Loading pretained PEFT - llama_adapter")
model = PeftModel.from_pretrained(
model,
cfg.peft_model_dir or cfg.lora_model_dir,
cfg.peft_model_dir,
torch_dtype=torch.float16,
)
else:
@@ -507,7 +507,7 @@ def load_llama_adapter(model, cfg):
def find_all_linear_names(model):
cls = (bnb.nn.Linear4bit, bnb.nn.Linear8bitLt, torch.nn.Linear, QuantLinear)
lora_module_names = set()
peft_module_names = set()
for name, module in model.named_modules():
if (
isinstance(module, cls)
@@ -515,12 +515,12 @@ def find_all_linear_names(model):
and module.__class__.__name__ not in ("LlamaLinearScalingRotaryEmbedding",)
):
names = name.split(".")
lora_module_names.add(names[0] if len(names) == 1 else names[-1])
peft_module_names.add(names[0] if len(names) == 1 else names[-1])
if "lm_head" in lora_module_names: # needed for 16-bit
lora_module_names.remove("lm_head")
if "lm_head" in peft_module_names: # needed for 16-bit
peft_module_names.remove("lm_head")
return list(lora_module_names)
return list(peft_module_names)
def load_lora(model, cfg, inference=False):
@@ -528,20 +528,20 @@ def load_lora(model, cfg, inference=False):
from peft import LoraConfig, PeftModel, get_peft_model
lora_target_modules = list(cfg.lora_target_modules or [])
peft_target_modules = list(cfg.peft_target_modules or [])
if cfg.lora_target_linear:
if cfg.peft_target_linear:
linear_names = find_all_linear_names(model)
LOG.info(f"found linear modules: {repr(linear_names)}")
lora_target_modules = list(set(lora_target_modules + linear_names))
peft_target_modules = list(set(peft_target_modules + linear_names))
lora_config = LoraConfig(
r=cfg.lora_r,
lora_alpha=cfg.lora_alpha,
target_modules=lora_target_modules,
lora_dropout=cfg.lora_dropout,
fan_in_fan_out=cfg.lora_fan_in_fan_out,
modules_to_save=cfg.lora_modules_to_save if cfg.lora_modules_to_save else None,
peft_config = LoraConfig(
r=cfg.peft_r,
lora_alpha=cfg.peft_alpha,
target_modules=peft_target_modules,
lora_dropout=cfg.peft_dropout,
fan_in_fan_out=cfg.peft_fan_in_fan_out,
modules_to_save=cfg.peft_modules_to_save if cfg.peft_modules_to_save else None,
bias="none",
task_type="CAUSAL_LM",
)
@@ -554,11 +554,11 @@ def load_lora(model, cfg, inference=False):
is_trainable=(not inference),
)
else:
model = get_peft_model(model, lora_config)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()
return model, lora_config
return model, peft_config
def load_ia3(model, cfg, inference=False):
@@ -566,18 +566,18 @@ def load_ia3(model, cfg, inference=False):
from peft import IA3Config, PeftModel, get_peft_model
ia3_config_kwargs = {}
if cfg.ia3_init_ia3_weights is not None:
ia3_config_kwargs["init_ia3_weights"] = cfg.ia3_init_ia3_weights
if cfg.ia3_fan_in_fan_out is not None:
ia3_config_kwargs["fan_in_fan_out"] = cfg.ia3_fan_in_fan_out
peft_config_kwargs = {}
if cfg.peft_init_ia3_weights is not None:
peft_config_kwargs["init_ia3_weights"] = cfg.peft_init_ia3_weights
if cfg.peft_fan_in_fan_out is not None:
peft_config_kwargs["fan_in_fan_out"] = cfg.peft_fan_in_fan_out
ia3_config = IA3Config(
target_modules=cfg.ia3_target_modules,
feedforward_modules=cfg.ia3_feedforward_modules,
modules_to_save=cfg.ia3_modules_to_save,
peft_config = IA3Config(
target_modules=cfg.peft_target_modules,
feedforward_modules=cfg.peft_feedforward_modules,
modules_to_save=cfg.peft_modules_to_save,
task_type="CAUSAL_LM",
**ia3_config_kwargs,
**peft_config_kwargs,
)
if cfg.peft_model_dir:
@@ -588,8 +588,8 @@ def load_ia3(model, cfg, inference=False):
is_trainable=(not inference),
)
else:
model = get_peft_model(model, ia3_config)
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()
return model, ia3_config
return model, peft_config

View File

@@ -0,0 +1,48 @@
"""Module for testing the validation module"""
import logging
import unittest
from typing import Optional
import pytest
from axolotl.utils.config import normalize_config
from axolotl.utils.dict import DictDefault
class NormalizationTest(unittest.TestCase):
"""
Test the cfg normalization module
"""
_caplog: Optional[pytest.LogCaptureFixture] = None
@pytest.fixture(autouse=True)
def inject_fixtures(self, caplog):
self._caplog = caplog
def test_lora_to_peft(self):
base_cfg = DictDefault(
{
"gradient_accumulation_steps": 1,
"micro_batch_size": 1,
"base_model": "NousResearch/Llama-2-7b-hf",
"base_model_config": "NousResearch/Llama-2-7b-hf",
}
)
cfg = base_cfg | DictDefault(
{
"adapter": "lora",
"lora_r": 128,
"lora_alpha": 64,
}
)
with self._caplog.at_level(logging.WARNING):
normalize_config(cfg)
assert any(
"soon to be deprecated. please use peft_" in record.message
for record in self._caplog.records
)
assert cfg.peft_r == 128
assert cfg.peft_alpha == 64