bump trl and accelerate for latest releases (#1730)
* bump trl and accelerate for latest releases * ensure that the CI runs on new gh org * drop kto_pair support since removed upstream
This commit is contained in:
@@ -1670,8 +1670,6 @@ class HFRLTrainerBuilder(TrainerBuilderBase):
|
||||
dpo_trainer_kwargs["loss_type"] = "ipo"
|
||||
if self.cfg.dpo_label_smoothing:
|
||||
dpo_trainer_kwargs["label_smoothing"] = self.cfg.dpo_label_smoothing
|
||||
elif self.cfg.rl == "kto_pair":
|
||||
dpo_trainer_kwargs["loss_type"] = "kto_pair"
|
||||
if self.eval_dataset:
|
||||
dpo_trainer_kwargs["eval_dataset"] = self.eval_dataset
|
||||
if self.cfg.adapter and self.peft_config:
|
||||
@@ -1680,7 +1678,7 @@ class HFRLTrainerBuilder(TrainerBuilderBase):
|
||||
dpo_trainer_kwargs[
|
||||
"precompute_ref_log_probs"
|
||||
] = self.cfg.precompute_ref_log_probs
|
||||
if self.cfg.rl in ["dpo", "ipo", "kto_pair"]:
|
||||
if self.cfg.rl in ["dpo", "ipo"]:
|
||||
trainer_cls = AxolotlDPOTrainer
|
||||
dpo_trainer_kwargs["beta"] = self.cfg.rl_beta or 0.1
|
||||
trainer_cls_args = [self.model, self.model_ref]
|
||||
@@ -1695,7 +1693,7 @@ class HFRLTrainerBuilder(TrainerBuilderBase):
|
||||
elif self.cfg.rl == "orpo":
|
||||
trainer_cls = AxolotlORPOTrainer
|
||||
trainer_cls_args = [self.model]
|
||||
elif self.cfg.rl == "kto":
|
||||
elif self.cfg.rl in ["kto"]:
|
||||
trainer_cls = AxolotlKTOTrainer
|
||||
trainer_cls_args = [self.model]
|
||||
else:
|
||||
|
||||
@@ -165,7 +165,6 @@ class RLType(str, Enum):
|
||||
|
||||
dpo = "dpo" # pylint: disable=invalid-name
|
||||
ipo = "ipo" # pylint: disable=invalid-name
|
||||
kto_pair = "kto_pair" # pylint: disable=invalid-name
|
||||
orpo = "orpo" # pylint: disable=invalid-name
|
||||
kto = "kto" # pylint: disable=invalid-name
|
||||
|
||||
|
||||
@@ -805,11 +805,7 @@ def load_model(
|
||||
if not reference_model or cfg.lora_model_dir:
|
||||
# if we're not loading the reference model, then we're loading the model for training
|
||||
# then the dpo trainer doesn't want the peft model loaded over it, it just wants the lora/peft config
|
||||
if (
|
||||
cfg.adapter
|
||||
and cfg.rl in ["dpo", "ipo", "kto_pair", "kto"]
|
||||
and not cfg.merge_lora
|
||||
):
|
||||
if cfg.adapter and cfg.rl in ["dpo", "ipo", "kto"] and not cfg.merge_lora:
|
||||
_, lora_config = load_lora(model, cfg, inference=False, config_only=True)
|
||||
else:
|
||||
model, lora_config = load_adapter(model, cfg, cfg.adapter)
|
||||
|
||||
@@ -427,7 +427,7 @@ def prepare_optim_env(cfg):
|
||||
|
||||
|
||||
def setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer, total_num_steps):
|
||||
if cfg.rl in ["dpo", "ipo", "kto_pair", "orpo", "kto"]:
|
||||
if cfg.rl in ["dpo", "ipo", "orpo", "kto"]:
|
||||
trainer_builder = HFRLTrainerBuilder(cfg, model[0], tokenizer)
|
||||
trainer_builder.model_ref = model[1]
|
||||
trainer_builder.peft_config = model[2]
|
||||
|
||||
Reference in New Issue
Block a user