From bf38e507fb124c8081bff71f70b42de474aa50ff Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sun, 6 Jul 2025 21:20:41 -0400 Subject: [PATCH] respect shuffle_merged_datasets for single dataset too (#2866) [skip ci] * respect shuffle_merged_datasets for single dataset too * update inline comment for behavior Co-authored-by: NanoCode012 --------- Co-authored-by: NanoCode012 --- src/axolotl/utils/data/shared.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/axolotl/utils/data/shared.py b/src/axolotl/utils/data/shared.py index a537c5b65..c3c70545c 100644 --- a/src/axolotl/utils/data/shared.py +++ b/src/axolotl/utils/data/shared.py @@ -526,8 +526,9 @@ def merge_datasets(datasets: list[Dataset], cfg: DictDefault) -> Dataset: if len(datasets) == 1: ds = datasets[0] - # Do not shuffle if curriculum sampling is enabled - if cfg.curriculum_sampling: + # Do not shuffle if curriculum sampling is enabled or + # shuffle_merged_datasets is disabled + if cfg.curriculum_sampling or not cfg.shuffle_merged_datasets: return ds return ds.shuffle(seed=cfg.seed)