Flash attn hotfix (#951)

* use previous arg * use eager to use legacy attention that can be patched
2023-12-13 13:42:23 -05:00
parent 450e04d3c4
commit f1f60cb5b2
1 changed files with 4 additions and 0 deletions
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -324,6 +324,10 @@ def load_model(
                model_config._attn_implementation = (  # pylint: disable=protected-access
                    "flash_attention_2"
                )
+            else:
+                model_config._attn_implementation = (  # pylint: disable=protected-access
+                    "eager"
+                )

    try:
        if cfg.is_llama_derived_model and not cfg.trust_remote_code and not cfg.gptq: