most model types now support flash attention 2 regardless of multipack support (#1854)

2024-08-22 16:39:23 -04:00
parent b33dc07a77
commit fefa95e350
2 changed files with 5 additions and 10 deletions
--- a/src/axolotl/monkeypatch/multipack.py
+++ b/src/axolotl/monkeypatch/multipack.py
@@ -17,6 +17,7 @@ SUPPORTED_MULTIPACK_MODEL_TYPES = [
    "qwen2_moe",
    "falcon",
    "phi",
    "phi3",
    "gemma",
    "gemma2",
    "gemmoe",
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -591,16 +591,10 @@ def load_model(
                "flash_attention_2"
            )
        else:
-            if model_config.model_type in SUPPORTED_MULTIPACK_MODEL_TYPES:
+            model_kwargs["attn_implementation"] = "flash_attention_2"
-                model_kwargs["attn_implementation"] = "flash_attention_2"
+            model_config._attn_implementation = (  # pylint: disable=protected-access
-                model_config._attn_implementation = (  # pylint: disable=protected-access
+                "flash_attention_2"
-                    "flash_attention_2"
+            )
                )
            else:
                model_kwargs["attn_implementation"] = "eager"
                model_config._attn_implementation = (  # pylint: disable=protected-access
                    "eager"
                )
    elif cfg.sdp_attention:
        model_kwargs["attn_implementation"] = "sdpa"
        model_config._attn_implementation = "sdpa"  # pylint: disable=protected-access