From e3c9d541a7b7816abcaa24a12616daa074f3c1dc Mon Sep 17 00:00:00 2001 From: Chiwan Park Date: Sat, 26 Apr 2025 06:15:03 +0900 Subject: [PATCH] fix: crash when pretraining_dataset with dispatch_batches is false (#2558) --- src/axolotl/utils/data/sft.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/axolotl/utils/data/sft.py b/src/axolotl/utils/data/sft.py index 413f6d144..12f0701f0 100644 --- a/src/axolotl/utils/data/sft.py +++ b/src/axolotl/utils/data/sft.py @@ -134,10 +134,9 @@ def prepare_dataset(cfg, tokenizer, processor=None, preprocess_iterable=None): "csv", data_files=f.name, split="train", streaming=True ) else: - if is_local_main_process(): - iter_ds = load_dataset( - path, streaming=True, split=split, name=name, data_files=data_files - ) + iter_ds = load_dataset( + path, streaming=True, split=split, name=name, data_files=data_files + ) if skip: LOG.info(f"Skipping {skip} samples from the dataset")