test fix

2025-03-17 01:21:22 +00:00
parent d187f1f8e2
commit 7d7042f602
2 changed files with 1 additions and 2 deletions
--- a/src/axolotl/core/trainers/base.py
+++ b/src/axolotl/core/trainers/base.py
@@ -977,7 +977,6 @@ class AxolotlTrainer(SchedulerMixin, OptimizerMixin, Trainer):
                packed_seq_lens=[seq_len] * batch_size, total_seq_len=total_seq_len
            )

-        # Get the loss from the parent implementation
        loss = super().training_step(model, inputs, num_items_in_batch)

        return loss
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -552,7 +552,7 @@ class ModelLoader:

            patch_self_attn_lora(self.cfg)

-        if self.cfg.sequence_parallel_degree > 1:
+        if self.cfg.sequence_parallel_degree and self.cfg.sequence_parallel_degree > 1:
            from axolotl.monkeypatch.attention.ring_attn import register_ring_attn

            # Initialize ring attn for sequence parallelism. This must be done after