Merge pull request #159 from AngainorDev/patch-1
Fix training over existing lora
This commit is contained in:
@@ -77,15 +77,9 @@ def load_tokenizer(
|
||||
|
||||
|
||||
def load_model(
|
||||
base_model,
|
||||
base_model_config,
|
||||
model_type,
|
||||
tokenizer,
|
||||
cfg,
|
||||
adapter="lora",
|
||||
inference=False,
|
||||
base_model, base_model_config, model_type, tokenizer, cfg, adapter="lora"
|
||||
):
|
||||
# type: (str, str, str, AutoTokenizer, DictDefault, Optional[str], bool) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
|
||||
# type: (str, str, str, AutoTokenizer, DictDefault, Optional[str]) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
|
||||
"""
|
||||
Load a model from a base model and a model type.
|
||||
"""
|
||||
@@ -98,7 +92,7 @@ def load_model(
|
||||
)
|
||||
|
||||
if cfg.is_llama_derived_model and cfg.flash_attention:
|
||||
if cfg.device not in ["mps", "cpu"] and inference is False:
|
||||
if cfg.device not in ["mps", "cpu"] and not cfg.inference:
|
||||
from axolotl.flash_attn import replace_llama_attn_with_flash_attn
|
||||
|
||||
logging.info("patching with flash attention")
|
||||
@@ -439,6 +433,7 @@ def load_lora(model, cfg):
|
||||
model = PeftModel.from_pretrained(
|
||||
model,
|
||||
cfg.lora_model_dir,
|
||||
is_trainable=not cfg.inference,
|
||||
device_map=cfg.device_map,
|
||||
# torch_dtype=torch.float16,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user