From de451f99a594fbde02fbac952f402caeb28171cf Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Sat, 5 Apr 2025 00:47:44 +0700 Subject: [PATCH] fix: cohere cce scaling wrong tensor (#2483) --- .../integrations/cut_cross_entropy/monkeypatch/cohere.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/axolotl/integrations/cut_cross_entropy/monkeypatch/cohere.py b/src/axolotl/integrations/cut_cross_entropy/monkeypatch/cohere.py index 5cdc53b0c..99e17910e 100644 --- a/src/axolotl/integrations/cut_cross_entropy/monkeypatch/cohere.py +++ b/src/axolotl/integrations/cut_cross_entropy/monkeypatch/cohere.py @@ -128,10 +128,10 @@ def cce_forward( if _PATCH_OPTS is not None and _PATCH_OPTS.use_lce(labels, self.training): assert labels is not None - # scale weight by logit_scale in-place of logits + # scale hidden_states by logit_scale in-place of logits loss = apply_lce( - hidden_states[:, slice_indices, :], - self.lm_head.weight * self.logit_scale, + hidden_states[:, slice_indices, :] * self.logit_scale, + self.lm_head.weight, labels, _PATCH_OPTS, **kwargs,