Mistral flash attn packing (#646)

* add mistral monkeypatch * add arg for decoder attention masl * fix lint for duplicate code * make sure to update transformers too * tweak install for e2e * move mistral patch to conditional
2023-09-27 18:41:00 -04:00
parent 85b0be2ba7
commit b6ab8aad62
4 changed files with 412 additions and 4 deletions
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -150,6 +150,14 @@ def load_model(
        # Note: This might overwrite previous additional_special_tokens
        tokenizer.add_special_tokens({"additional_special_tokens": [MEM_TOKEN]})

+    if cfg.is_mistral_derived_model and cfg.flash_attention:
+        from axolotl.monkeypatch.mistral_attn_hijack_flash import (
+            replace_mistral_attn_with_flash_attn,
+        )
+
+        LOG.info("patching with flash attention")
+        replace_mistral_attn_with_flash_attn(packed=cfg.sample_packing)
+
    if cfg.is_llama_derived_model and cfg.xpos_rope:
        from axolotl.monkeypatch.xpos_rope_llama_monkey_patch import (
            replace_llama_rope_with_xpos_rope,