diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 8e2955414..a57b6d83e 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -306,6 +306,10 @@ class TestDatasetPreparation(unittest.TestCase): """Verify that processing data from the hub works with a specific revision""" with tempfile.TemporaryDirectory() as tmp_dir: prepared_path = Path(tmp_dir) / "prepared" + + # make sure prepared_path is empty + shutil.rmtree(prepared_path, ignore_errors=True) + cfg = DictDefault( { "tokenizer_config": "huggyllama/llama-7b",