fix
This commit is contained in:
@@ -109,7 +109,9 @@ def prepare_dataset(cfg, tokenizer, processor=None):
|
|||||||
cfg.pretraining_dataset[0]["type"] or "pretrain",
|
cfg.pretraining_dataset[0]["type"] or "pretrain",
|
||||||
)
|
)
|
||||||
|
|
||||||
iter_ds = load_dataset(path, streaming=True, split=split, name=name, data_files=data_files)
|
iter_ds = load_dataset(
|
||||||
|
path, streaming=True, split=split, name=name, data_files=data_files
|
||||||
|
)
|
||||||
if skip:
|
if skip:
|
||||||
LOG.info(f"Skipping {skip} samples from the dataset")
|
LOG.info(f"Skipping {skip} samples from the dataset")
|
||||||
iter_ds = iter_ds.skip(skip)
|
iter_ds = iter_ds.skip(skip)
|
||||||
|
|||||||
@@ -63,10 +63,5 @@ class TestMamba(unittest.TestCase):
|
|||||||
cli_args = TrainerCliArgs()
|
cli_args = TrainerCliArgs()
|
||||||
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
|
||||||
|
|
||||||
<<<<<<< HEAD
|
|
||||||
train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
|
|
||||||
check_model_output_exists(temp_dir, cfg)
|
|
||||||
=======
|
|
||||||
train(cfg=cfg, dataset_meta=dataset_meta)
|
train(cfg=cfg, dataset_meta=dataset_meta)
|
||||||
assert (Path(temp_dir) / "pytorch_model.bin").exists()
|
check_model_output_exists(temp_dir, cfg)
|
||||||
>>>>>>> 2a421127 (continued cleanup and documentation)
|
|
||||||
|
|||||||
Reference in New Issue
Block a user