respect shuffle_merged_datasets for single dataset too (#2866) [skip ci]
* respect shuffle_merged_datasets for single dataset too * update inline comment for behavior Co-authored-by: NanoCode012 <nano@axolotl.ai> --------- Co-authored-by: NanoCode012 <nano@axolotl.ai>
This commit is contained in:
@@ -526,8 +526,9 @@ def merge_datasets(datasets: list[Dataset], cfg: DictDefault) -> Dataset:
|
|||||||
if len(datasets) == 1:
|
if len(datasets) == 1:
|
||||||
ds = datasets[0]
|
ds = datasets[0]
|
||||||
|
|
||||||
# Do not shuffle if curriculum sampling is enabled
|
# Do not shuffle if curriculum sampling is enabled or
|
||||||
if cfg.curriculum_sampling:
|
# shuffle_merged_datasets is disabled
|
||||||
|
if cfg.curriculum_sampling or not cfg.shuffle_merged_datasets:
|
||||||
return ds
|
return ds
|
||||||
|
|
||||||
return ds.shuffle(seed=cfg.seed)
|
return ds.shuffle(seed=cfg.seed)
|
||||||
|
|||||||
Reference in New Issue
Block a user