Merge pull request #159 from AngainorDev/patch-1

Fix training over existing lora
2023-06-12 20:27:11 +09:00
parent e21dab49fd b565ecf0a1
commit 8e568bbdae
2 changed files with 8 additions and 14 deletions
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -77,15 +77,9 @@ def load_tokenizer(


 def load_model(
-    base_model,
-    base_model_config,
-    model_type,
-    tokenizer,
-    cfg,
-    adapter="lora",
-    inference=False,
+    base_model, base_model_config, model_type, tokenizer, cfg, adapter="lora"
 ):
-    # type: (str, str, str, AutoTokenizer, DictDefault, Optional[str], bool) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
+    # type: (str, str, str, AutoTokenizer, DictDefault, Optional[str]) -> Tuple[PreTrainedModel, Optional[PeftConfig]]
    """
    Load a model from a base model and a model type.
    """
@@ -98,7 +92,7 @@ def load_model(
    )

    if cfg.is_llama_derived_model and cfg.flash_attention:
-        if cfg.device not in ["mps", "cpu"] and inference is False:
+        if cfg.device not in ["mps", "cpu"] and not cfg.inference:
            from axolotl.flash_attn import replace_llama_attn_with_flash_attn

            logging.info("patching with flash attention")
@@ -439,6 +433,7 @@ def load_lora(model, cfg):
        model = PeftModel.from_pretrained(
            model,
            cfg.lora_model_dir,
+            is_trainable=not cfg.inference,
            device_map=cfg.device_map,
            # torch_dtype=torch.float16,
        )