Add weighted optimisation support for trl DPO trainer integration (#2016)

* trlv0.12.0  integration

* update trl version requirements

* linting

* commenting out

* trl version requirement
This commit is contained in:
Sunny Liu
2024-11-08 11:29:11 -05:00
committed by GitHub
parent 3cb2d75de1
commit 3265b7095e
5 changed files with 61 additions and 5 deletions

View File

@@ -183,6 +183,8 @@ test_datasets:
# use RL training: 'dpo', 'ipo', 'kto'
rl:
# whether to perform weighting if doing DPO training. Boolean.
dpo_use_weighting:
# The name of the chat template to use for training, following values are supported:
# - tokenizer_default: Uses the chat template that is available in the tokenizer_config.json. If the chat template is not available in the tokenizer, it will raise an error. This is the default value.