fix: crash when pretraining_dataset with dispatch_batches is false (#2558)
This commit is contained in:
@@ -134,10 +134,9 @@ def prepare_dataset(cfg, tokenizer, processor=None, preprocess_iterable=None):
|
|||||||
"csv", data_files=f.name, split="train", streaming=True
|
"csv", data_files=f.name, split="train", streaming=True
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
if is_local_main_process():
|
iter_ds = load_dataset(
|
||||||
iter_ds = load_dataset(
|
path, streaming=True, split=split, name=name, data_files=data_files
|
||||||
path, streaming=True, split=split, name=name, data_files=data_files
|
)
|
||||||
)
|
|
||||||
|
|
||||||
if skip:
|
if skip:
|
||||||
LOG.info(f"Skipping {skip} samples from the dataset")
|
LOG.info(f"Skipping {skip} samples from the dataset")
|
||||||
|
|||||||
Reference in New Issue
Block a user