respect shuffle_merged_datasets for single dataset too (#2866) [skip ci]

* respect shuffle_merged_datasets for single dataset too

* update inline comment for behavior

Co-authored-by: NanoCode012 <nano@axolotl.ai>

---------

Co-authored-by: NanoCode012 <nano@axolotl.ai>
This commit is contained in:
Wing Lian
2025-07-06 21:20:41 -04:00
committed by GitHub
parent a5946ff1f0
commit bf38e507fb

View File

@@ -526,8 +526,9 @@ def merge_datasets(datasets: list[Dataset], cfg: DictDefault) -> Dataset:
if len(datasets) == 1:
ds = datasets[0]
# Do not shuffle if curriculum sampling is enabled
if cfg.curriculum_sampling:
# Do not shuffle if curriculum sampling is enabled or
# shuffle_merged_datasets is disabled
if cfg.curriculum_sampling or not cfg.shuffle_merged_datasets:
return ds
return ds.shuffle(seed=cfg.seed)