Compare commits

...

1 Commits

Author SHA1 Message Date
Wing Lian
385736fae1 fix linter issue from merge 2025-01-13 12:55:03 -05:00

View File

@@ -109,7 +109,9 @@ def prepare_dataset(cfg, tokenizer, processor=None):
cfg.pretraining_dataset[0]["type"] or "pretrain",
)
iter_ds = load_dataset(path, streaming=True, split=split, name=name, data_files=data_files)
iter_ds = load_dataset(
path, streaming=True, split=split, name=name, data_files=data_files
)
if skip:
LOG.info(f"Skipping {skip} samples from the dataset")
iter_ds = iter_ds.skip(skip)