From 06f83a54a5e273de4a4b17efcceb1e83ef1a5e53 Mon Sep 17 00:00:00 2001 From: Sunny Liu Date: Wed, 22 Jan 2025 20:45:44 -0500 Subject: [PATCH] llama sdpa patching WIP - static class function import --- src/axolotl/monkeypatch/llama_patch_multipack.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/axolotl/monkeypatch/llama_patch_multipack.py b/src/axolotl/monkeypatch/llama_patch_multipack.py index 9f03f551d..bc844af25 100644 --- a/src/axolotl/monkeypatch/llama_patch_multipack.py +++ b/src/axolotl/monkeypatch/llama_patch_multipack.py @@ -28,11 +28,11 @@ def hijack_llama_prepare_4d_mask(): # ) def llama_patched_prepare_4d_causal_attention_mask_with_cache_position( - attention_mask: Optional[torch.Tensor], *args, **kwargs + self, attention_mask: Optional[torch.Tensor], *args, **kwargs ): dtype = torch.bfloat16 if is_torch_bf16_gpu_available() else torch.float32 return LlamaModel._prepare_4d_causal_attention_mask_with_cache_position( - mask_2d_to_4d(attention_mask, dtype=dtype), *args, **kwargs + self, mask_2d_to_4d(attention_mask, dtype=dtype), *args, **kwargs ) LlamaModel._prepare_4d_causal_attention_mask_with_cache_position = ( # pylint: disable=protected-access