Fix and document test_datasets (#1228)

* Make sure test_dataset are used and treat val_set_size.

* Add test_datasets docs.

* Apply suggestions from code review

---------

Co-authored-by: Wing Lian <wing.lian@gmail.com>
This commit is contained in:
DreamGenX
2024-01-31 12:48:57 +01:00
committed by GitHub
parent 8608d8003e
commit 5787e1a23f
3 changed files with 14 additions and 2 deletions

View File

@@ -607,6 +607,17 @@ datasets:
# For `completion` datsets only, uses the provided field instead of `text` column
field:
# A list of one or more datasets to eval the model with.
# You can use either test_datasets, or val_set_size, but not both.
test_datasets:
- path: /workspace/data/eval.jsonl
ds_type: json
# You need to specify a split. For "json" datasets the default split is called "train".
split: train
type: completion
data_files:
- /workspace/data/eval.jsonl
# use RL training: dpo, ipo, kto_pair
rl: