fix: cohere cce scaling wrong tensor (#2483)
This commit is contained in:
@@ -128,10 +128,10 @@ def cce_forward(
|
|||||||
|
|
||||||
if _PATCH_OPTS is not None and _PATCH_OPTS.use_lce(labels, self.training):
|
if _PATCH_OPTS is not None and _PATCH_OPTS.use_lce(labels, self.training):
|
||||||
assert labels is not None
|
assert labels is not None
|
||||||
# scale weight by logit_scale in-place of logits
|
# scale hidden_states by logit_scale in-place of logits
|
||||||
loss = apply_lce(
|
loss = apply_lce(
|
||||||
hidden_states[:, slice_indices, :],
|
hidden_states[:, slice_indices, :] * self.logit_scale,
|
||||||
self.lm_head.weight * self.logit_scale,
|
self.lm_head.weight,
|
||||||
labels,
|
labels,
|
||||||
_PATCH_OPTS,
|
_PATCH_OPTS,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
|
|||||||
Reference in New Issue
Block a user