diff --git a/src/axolotl/utils/data/sft.py b/src/axolotl/utils/data/sft.py index aff047675..de373c06e 100644 --- a/src/axolotl/utils/data/sft.py +++ b/src/axolotl/utils/data/sft.py @@ -109,7 +109,9 @@ def prepare_dataset(cfg, tokenizer, processor=None): cfg.pretraining_dataset[0]["type"] or "pretrain", ) - iter_ds = load_dataset(path, streaming=True, split=split, name=name, data_files=data_files) + iter_ds = load_dataset( + path, streaming=True, split=split, name=name, data_files=data_files + ) if skip: LOG.info(f"Skipping {skip} samples from the dataset") iter_ds = iter_ds.skip(skip)