From f1f60cb5b2f826f6108233e54af4dab06dfb2993 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Wed, 13 Dec 2023 13:42:23 -0500 Subject: [PATCH] Flash attn hotfix (#951) * use previous arg * use eager to use legacy attention that can be patched --- src/axolotl/utils/models.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py index 41a3582ea..8f148a342 100644 --- a/src/axolotl/utils/models.py +++ b/src/axolotl/utils/models.py @@ -324,6 +324,10 @@ def load_model( model_config._attn_implementation = ( # pylint: disable=protected-access "flash_attention_2" ) + else: + model_config._attn_implementation = ( # pylint: disable=protected-access + "eager" + ) try: if cfg.is_llama_derived_model and not cfg.trust_remote_code and not cfg.gptq: