upgrade trl and accelerate (#3161)

* upgrade trl==0.23.0 * upgrade accelerate patch fix * add hints when using gradient_checkpointing with DPO * set gradient-checpointing properly
2025-09-16 14:53:01 -04:00
parent d4cff1b7bb
commit 86d6ee7c05
4 changed files with 20 additions and 5 deletions
--- a/src/axolotl/core/builders/base.py
+++ b/src/axolotl/core/builders/base.py
@@ -435,7 +435,7 @@ class TrainerBuilderBase(abc.ABC):
            # don't use the HF gradient checkpointing, manually wrap
            training_args_kwargs["gradient_checkpointing"] = False
            training_args_kwargs["activation_offloading"] = True
-        elif self.cfg.gradient_checkpointing:
+        elif self.cfg.gradient_checkpointing is not None:
            training_args_kwargs["gradient_checkpointing"] = (
                self.cfg.gradient_checkpointing
            )
--- a/src/axolotl/utils/schemas/validation.py
+++ b/src/axolotl/utils/schemas/validation.py
@@ -1378,6 +1378,21 @@ class ComplexValidationMixin:

        return self

+    def hint_gradient_checkpointing_dpo_lora_ddp(self):
+        if (
+            (self.gradient_checkpointing is True or self.gradient_checkpointing is None)
+            and self.capabilities
+            and self.capabilities.get("n_gpu", 1) > 1
+            and self.adapter in ("lora", "qlora")
+            and self.rl == RLType.DPO
+            and not self.fsdp
+            and not self.deepspeed
+        ):
+            LOG.warning(
+                "gradient_checkpointing with DPO + DDP + LoRA is not recommended."
+            )
+        return self
+

 class DistributedValidationMixin:
    """validation for distributed training."""