From a213d9972aeca545e9176c45f0a0bdab04ace277 Mon Sep 17 00:00:00 2001
From: Aman Karmani <aman@tmm1.net>
Date: Mon, 21 Aug 2023 10:40:06 -0700
Subject: [PATCH] fix eval regression caused in
 13f7efaf74fcd3c4514277ccb71914c589873f6a

---
 src/axolotl/monkeypatch/llama_attn_hijack_flash.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/axolotl/monkeypatch/llama_attn_hijack_flash.py b/src/axolotl/monkeypatch/llama_attn_hijack_flash.py
index 79199e34c..cb0aa3fe6 100644
--- a/src/axolotl/monkeypatch/llama_attn_hijack_flash.py
+++ b/src/axolotl/monkeypatch/llama_attn_hijack_flash.py
@@ -155,12 +155,10 @@ def flashattn_forward(
         # during training q,k,v always have same seqlen
         assert key_states.shape == query_states.shape
         is_causal = True
-    elif past_key_value is None:
-        is_causal = True
     else:
         # turn off FA causal mask after first inference autoregressive iteration
         # only on first autoregressive step q,k,v have same seqlen
-        is_causal = past_key_value is not None
+        is_causal = key_states.shape == query_states.shape
 
     if cu_seqlens is not None and max_seqlen is not None:
         # special handling using sample packing