diff --git a/src/axolotl/monkeypatch/llama_patch_multipack.py b/src/axolotl/monkeypatch/llama_patch_multipack.py index 43b11d918..ad72af385 100644 --- a/src/axolotl/monkeypatch/llama_patch_multipack.py +++ b/src/axolotl/monkeypatch/llama_patch_multipack.py @@ -35,7 +35,7 @@ def hijack_llama_prepare_4d_mask(): # return LlamaModel._prepare_4d_causal_attention_mask_with_cache_position( # mask_2d_to_4d(attention_mask, dtype=dtype), **kwargs # ) - return mask_2d_to_4d(attention_mask, dtype=dtype) + return mask_2d_to_4d(attention_mask, dtype=dtype).bool() LlamaModel._prepare_4d_causal_attention_mask_with_cache_position = ( # pylint: disable=protected-access llama_patched_prepare_4d_causal_attention_mask_with_cache_position