This commit is contained in:
Dan Saunders
2025-03-17 01:21:22 +00:00
parent d187f1f8e2
commit 7d7042f602
2 changed files with 1 additions and 2 deletions

View File

@@ -977,7 +977,6 @@ class AxolotlTrainer(SchedulerMixin, OptimizerMixin, Trainer):
packed_seq_lens=[seq_len] * batch_size, total_seq_len=total_seq_len
)
# Get the loss from the parent implementation
loss = super().training_step(model, inputs, num_items_in_batch)
return loss

View File

@@ -552,7 +552,7 @@ class ModelLoader:
patch_self_attn_lora(self.cfg)
if self.cfg.sequence_parallel_degree > 1:
if self.cfg.sequence_parallel_degree and self.cfg.sequence_parallel_degree > 1:
from axolotl.monkeypatch.attention.ring_attn import register_ring_attn
# Initialize ring attn for sequence parallelism. This must be done after