let's not cleanup the cached datasets

This commit is contained in:
Wing Lian
2023-08-08 21:27:55 -04:00
parent 26983a1974
commit 57d9bf711c
2 changed files with 1 additions and 6 deletions

View File

@@ -371,12 +371,6 @@ def train(
model = BetterTransformer.reverse(model)
model.save_pretrained(cfg.output_dir)
trainer.accelerator.wait_for_everyone()
if trainer.accelerator.is_main_process:
train_dataset.cleanup_cache_files()
if eval_dataset:
eval_dataset.cleanup_cache_files()
if __name__ == "__main__":
fire.Fire(train)

View File

@@ -377,6 +377,7 @@ def load_prepare_datasets(
dataset = Dataset.from_list(list(constant_len_dataset))
# filter out bad data
# TODO convert to dataset.filter(...)
dataset = Dataset.from_list(
[
d