upgrade trl and accelerate (#3161)

* upgrade trl==0.23.0

* upgrade accelerate patch fix

* add hints when using gradient_checkpointing with DPO

* set gradient-checpointing properly
This commit is contained in:
Wing Lian
2025-09-16 14:53:01 -04:00
committed by GitHub
parent d4cff1b7bb
commit 86d6ee7c05
4 changed files with 20 additions and 5 deletions

View File

@@ -15,10 +15,10 @@ huggingface_hub>=0.33.0
peft>=0.17.0 peft>=0.17.0
transformers==4.56.1 transformers==4.56.1
tokenizers>=0.21.1 tokenizers>=0.21.1
accelerate==1.10.0 accelerate==1.10.1
datasets==4.0.0 datasets==4.0.0
deepspeed>=0.17.0 deepspeed>=0.17.0
trl==0.21.0 trl==0.23.0
hf_xet==1.1.5 hf_xet==1.1.5
kernels==0.9.0 kernels==0.9.0
trackio trackio

View File

@@ -435,7 +435,7 @@ class TrainerBuilderBase(abc.ABC):
# don't use the HF gradient checkpointing, manually wrap # don't use the HF gradient checkpointing, manually wrap
training_args_kwargs["gradient_checkpointing"] = False training_args_kwargs["gradient_checkpointing"] = False
training_args_kwargs["activation_offloading"] = True training_args_kwargs["activation_offloading"] = True
elif self.cfg.gradient_checkpointing: elif self.cfg.gradient_checkpointing is not None:
training_args_kwargs["gradient_checkpointing"] = ( training_args_kwargs["gradient_checkpointing"] = (
self.cfg.gradient_checkpointing self.cfg.gradient_checkpointing
) )

View File

@@ -1378,6 +1378,21 @@ class ComplexValidationMixin:
return self return self
def hint_gradient_checkpointing_dpo_lora_ddp(self):
if (
(self.gradient_checkpointing is True or self.gradient_checkpointing is None)
and self.capabilities
and self.capabilities.get("n_gpu", 1) > 1
and self.adapter in ("lora", "qlora")
and self.rl == RLType.DPO
and not self.fsdp
and not self.deepspeed
):
LOG.warning(
"gradient_checkpointing with DPO + DDP + LoRA is not recommended."
)
return self
class DistributedValidationMixin: class DistributedValidationMixin:
"""validation for distributed training.""" """validation for distributed training."""

View File

@@ -199,7 +199,7 @@ class TestMultiGPULlama:
"max_steps": 2, "max_steps": 2,
"micro_batch_size": 2, "micro_batch_size": 2,
"gradient_accumulation_steps": 2, "gradient_accumulation_steps": 2,
# "gradient_checkpointing": True, "gradient_checkpointing": False,
"output_dir": temp_dir, "output_dir": temp_dir,
"dataset_prepared_path": temp_dir + "/last_run_prepared", "dataset_prepared_path": temp_dir + "/last_run_prepared",
"warmup_steps": 0, "warmup_steps": 0,
@@ -278,7 +278,7 @@ class TestMultiGPULlama:
"max_steps": 2, "max_steps": 2,
"micro_batch_size": 2, "micro_batch_size": 2,
"gradient_accumulation_steps": 2, "gradient_accumulation_steps": 2,
# "gradient_checkpointing": True, "gradient_checkpointing": False,
"output_dir": temp_dir, "output_dir": temp_dir,
"dataset_prepared_path": temp_dir + "/last_run_prepared", "dataset_prepared_path": temp_dir + "/last_run_prepared",
"warmup_steps": 0, "warmup_steps": 0,