From f1f60cb5b2f826f6108233e54af4dab06dfb2993 Mon Sep 17 00:00:00 2001
From: Wing Lian <wing.lian@gmail.com>
Date: Wed, 13 Dec 2023 13:42:23 -0500
Subject: [PATCH] Flash attn hotfix (#951)

* use previous  arg

* use eager to use legacy attention that can be patched
---
 src/axolotl/utils/models.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py
index 41a3582ea..8f148a342 100644
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -324,6 +324,10 @@ def load_model(
                 model_config._attn_implementation = (  # pylint: disable=protected-access
                     "flash_attention_2"
                 )
+            else:
+                model_config._attn_implementation = (  # pylint: disable=protected-access
+                    "eager"
+                )
 
     try:
         if cfg.is_llama_derived_model and not cfg.trust_remote_code and not cfg.gptq: