From 1c603da96a67b7be6556cd82cf1ffebe2960e326 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Thu, 19 Dec 2024 01:21:48 -0500 Subject: [PATCH] flipped the slice --- src/axolotl/core/trainers/kd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/axolotl/core/trainers/kd.py b/src/axolotl/core/trainers/kd.py index f1b47f50c..bec18821d 100644 --- a/src/axolotl/core/trainers/kd.py +++ b/src/axolotl/core/trainers/kd.py @@ -23,7 +23,7 @@ def kd_loss_function( # Slice student logits to match the teacher-provided sequence length student_logits_for_kd = student_logits[ - :, -teacher_seq_len:, : + :, :teacher_seq_len, : ] # [B, teacher_seq_len, vocab_size] # Gather student logits for teacher's top-K tokens