add support for SPPO

This commit is contained in:
Wing Lian
2024-05-02 08:56:15 -04:00
parent 3367fca732
commit 7fea5822f0
5 changed files with 76 additions and 5 deletions

View File

@@ -138,7 +138,7 @@ test_datasets:
data_files:
- /workspace/data/eval.jsonl
# use RL training: 'dpo', 'ipo', 'kto_pair'
# use RL training: 'dpo', 'ipo', 'kto_pair', 'orpo', 'sppo'
rl:
# Saves the desired chat template to the tokenizer_config.json for easier inferencing