diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py index baa0ce626..9ef1ac95b 100644 --- a/src/axolotl/utils/trainer.py +++ b/src/axolotl/utils/trainer.py @@ -104,8 +104,8 @@ def setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer): group_by_length=cfg.group_by_length, report_to="wandb" if cfg.use_wandb else None, run_name=cfg.wandb_run_id if cfg.use_wandb else None, - optim=cfg.optimizer if cfg.optimizer else None, - lr_scheduler_type=cfg.lr_scheduler if cfg.lr_scheduler not in ("one_cycle", "log_sweep") else "cosine", + optim=cfg.optimizer if cfg.optimizer else "adamw_hf", + lr_scheduler_type=cfg.lr_scheduler if cfg.lr_scheduler and cfg.lr_scheduler not in ("one_cycle", "log_sweep") else "cosine", weight_decay=cfg.weight_decay if cfg.weight_decay is not None else 0.0, **training_arguments_kwargs, )