skip some flash attn patches unless explicitly enabled (#643)

* skip some flash attn patches if explicitly disabled * make the other patches optional
2023-09-27 12:11:07 -04:00
parent e7d3e2dbb6
commit 895f0a0723
3 changed files with 38 additions and 24 deletions
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -121,7 +121,11 @@ def load_model(
            )

            LOG.info("patching with flash attention for sample packing")
-            replace_llama_attn_with_flash_attn(packed=cfg.sample_packing)
+            replace_llama_attn_with_flash_attn(
+                packed=cfg.sample_packing,
+                cross_entropy=cfg.flash_attn_cross_entropy,
+                rms_norm=cfg.flash_attn_rms_norm,
+            )
    elif cfg.is_llama_derived_model and cfg.xformers_attention:
        from axolotl.monkeypatch.llama_attn_hijack_xformers import (
            hijack_llama_attention,