From a030dad65756bdbdbc6a07f3ea7361bbfc44742c Mon Sep 17 00:00:00 2001 From: Dan Saunders Date: Mon, 13 Jan 2025 17:25:12 +0000 Subject: [PATCH] fix --- src/axolotl/utils/data/sft.py | 4 +++- tests/e2e/test_mamba.py | 7 +------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/src/axolotl/utils/data/sft.py b/src/axolotl/utils/data/sft.py index aff047675..de373c06e 100644 --- a/src/axolotl/utils/data/sft.py +++ b/src/axolotl/utils/data/sft.py @@ -109,7 +109,9 @@ def prepare_dataset(cfg, tokenizer, processor=None): cfg.pretraining_dataset[0]["type"] or "pretrain", ) - iter_ds = load_dataset(path, streaming=True, split=split, name=name, data_files=data_files) + iter_ds = load_dataset( + path, streaming=True, split=split, name=name, data_files=data_files + ) if skip: LOG.info(f"Skipping {skip} samples from the dataset") iter_ds = iter_ds.skip(skip) diff --git a/tests/e2e/test_mamba.py b/tests/e2e/test_mamba.py index 0591fb23a..4b4db3058 100644 --- a/tests/e2e/test_mamba.py +++ b/tests/e2e/test_mamba.py @@ -63,10 +63,5 @@ class TestMamba(unittest.TestCase): cli_args = TrainerCliArgs() dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args) -<<<<<<< HEAD - train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta) - check_model_output_exists(temp_dir, cfg) -======= train(cfg=cfg, dataset_meta=dataset_meta) - assert (Path(temp_dir) / "pytorch_model.bin").exists() ->>>>>>> 2a421127 (continued cleanup and documentation) + check_model_output_exists(temp_dir, cfg)