Add weighted optimisation support for trl DPO trainer integration (#2016)

* trlv0.12.0 integration * update trl version requirements * linting * commenting out * trl version requirement
2024-11-08 11:29:11 -05:00
parent 3cb2d75de1
commit 3265b7095e
5 changed files with 61 additions and 5 deletions
--- a/docs/config.qmd
+++ b/docs/config.qmd
@@ -183,6 +183,8 @@ test_datasets:

 # use RL training: 'dpo', 'ipo', 'kto'
 rl:
+# whether to perform weighting if doing DPO training. Boolean.
+dpo_use_weighting:

 # The name of the chat template to use for training, following values are supported:
 # - tokenizer_default: Uses the chat template that is available in the tokenizer_config.json. If the chat template is not available in the tokenizer, it will raise an error. This is the default value.