From fb88269dcb0c92fb244b1c5e86b3ab130aef26ff Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Tue, 4 Feb 2025 02:01:05 +0700 Subject: [PATCH] fix: set model_accepts_loss_kwargs=False --- .../integrations/lolcats/trainer/distill_attention_xent_mse.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/axolotl/integrations/lolcats/trainer/distill_attention_xent_mse.py b/src/axolotl/integrations/lolcats/trainer/distill_attention_xent_mse.py index 453e141d8..62a95ed7b 100644 --- a/src/axolotl/integrations/lolcats/trainer/distill_attention_xent_mse.py +++ b/src/axolotl/integrations/lolcats/trainer/distill_attention_xent_mse.py @@ -33,6 +33,8 @@ class DistillAttentionXentMSETrainer(AxolotlTrainer): self.xent_factor = xent_factor # self.compute_loss_backprop = False # Whether we backprop in self.compute_loss # NOTE: this config seems unnecessary + self.model_accepts_loss_kwargs = False # added to combat explosive loss + def compute_loss( self, model: nn.Module,