Fix and document test_datasets (#1228)
* Make sure test_dataset are used and treat val_set_size. * Add test_datasets docs. * Apply suggestions from code review --------- Co-authored-by: Wing Lian <wing.lian@gmail.com>
This commit is contained in:
11
README.md
11
README.md
@@ -607,6 +607,17 @@ datasets:
|
|||||||
# For `completion` datsets only, uses the provided field instead of `text` column
|
# For `completion` datsets only, uses the provided field instead of `text` column
|
||||||
field:
|
field:
|
||||||
|
|
||||||
|
# A list of one or more datasets to eval the model with.
|
||||||
|
# You can use either test_datasets, or val_set_size, but not both.
|
||||||
|
test_datasets:
|
||||||
|
- path: /workspace/data/eval.jsonl
|
||||||
|
ds_type: json
|
||||||
|
# You need to specify a split. For "json" datasets the default split is called "train".
|
||||||
|
split: train
|
||||||
|
type: completion
|
||||||
|
data_files:
|
||||||
|
- /workspace/data/eval.jsonl
|
||||||
|
|
||||||
# use RL training: dpo, ipo, kto_pair
|
# use RL training: dpo, ipo, kto_pair
|
||||||
rl:
|
rl:
|
||||||
|
|
||||||
|
|||||||
@@ -735,7 +735,7 @@ class HFCausalTrainerBuilder(TrainerBuilderBase):
|
|||||||
elif self.cfg.sample_packing and self.cfg.eval_sample_packing is False:
|
elif self.cfg.sample_packing and self.cfg.eval_sample_packing is False:
|
||||||
training_arguments_kwargs["dataloader_drop_last"] = True
|
training_arguments_kwargs["dataloader_drop_last"] = True
|
||||||
|
|
||||||
if self.cfg.val_set_size == 0:
|
if not self.cfg.test_datasets and self.cfg.val_set_size == 0:
|
||||||
# no eval set, so don't eval
|
# no eval set, so don't eval
|
||||||
training_arguments_kwargs["evaluation_strategy"] = "no"
|
training_arguments_kwargs["evaluation_strategy"] = "no"
|
||||||
elif self.cfg.eval_steps:
|
elif self.cfg.eval_steps:
|
||||||
@@ -822,6 +822,7 @@ class HFCausalTrainerBuilder(TrainerBuilderBase):
|
|||||||
self.cfg.load_best_model_at_end is not False
|
self.cfg.load_best_model_at_end is not False
|
||||||
or self.cfg.early_stopping_patience
|
or self.cfg.early_stopping_patience
|
||||||
)
|
)
|
||||||
|
and not self.cfg.test_datasets
|
||||||
and self.cfg.val_set_size > 0
|
and self.cfg.val_set_size > 0
|
||||||
and self.cfg.save_steps
|
and self.cfg.save_steps
|
||||||
and self.cfg.eval_steps
|
and self.cfg.eval_steps
|
||||||
|
|||||||
@@ -440,7 +440,7 @@ def load_prepare_datasets(
|
|||||||
split="train",
|
split="train",
|
||||||
) -> Tuple[Dataset, Dataset, List[Prompter]]:
|
) -> Tuple[Dataset, Dataset, List[Prompter]]:
|
||||||
dataset, prompters = load_tokenized_prepared_datasets(
|
dataset, prompters = load_tokenized_prepared_datasets(
|
||||||
tokenizer, cfg, default_dataset_prepared_path
|
tokenizer, cfg, default_dataset_prepared_path, split=split
|
||||||
)
|
)
|
||||||
|
|
||||||
if cfg.dataset_shard_num and cfg.dataset_shard_idx is not None:
|
if cfg.dataset_shard_num and cfg.dataset_shard_idx is not None:
|
||||||
|
|||||||
Reference in New Issue
Block a user