add support for SPPO

2024-05-02 08:56:15 -04:00
parent 3367fca732
commit 7fea5822f0
5 changed files with 76 additions and 5 deletions
--- a/docs/config.qmd
+++ b/docs/config.qmd
@@ -138,7 +138,7 @@ test_datasets:
    data_files:
      - /workspace/data/eval.jsonl

-# use RL training: 'dpo', 'ipo', 'kto_pair'
+# use RL training: 'dpo', 'ipo', 'kto_pair', 'orpo', 'sppo'
 rl:

 # Saves the desired chat template to the tokenizer_config.json for easier inferencing