From 57d9bf711cf3007fc355dec4d088491af006a21f Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Tue, 8 Aug 2023 21:27:55 -0400 Subject: [PATCH] let's not cleanup the cached datasets --- scripts/finetune.py | 6 ------ src/axolotl/utils/data.py | 1 + 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/scripts/finetune.py b/scripts/finetune.py index 846127d29..ef65576ea 100644 --- a/scripts/finetune.py +++ b/scripts/finetune.py @@ -371,12 +371,6 @@ def train( model = BetterTransformer.reverse(model) model.save_pretrained(cfg.output_dir) - trainer.accelerator.wait_for_everyone() - if trainer.accelerator.is_main_process: - train_dataset.cleanup_cache_files() - if eval_dataset: - eval_dataset.cleanup_cache_files() - if __name__ == "__main__": fire.Fire(train) diff --git a/src/axolotl/utils/data.py b/src/axolotl/utils/data.py index d8053ba15..2ff2978ac 100644 --- a/src/axolotl/utils/data.py +++ b/src/axolotl/utils/data.py @@ -377,6 +377,7 @@ def load_prepare_datasets( dataset = Dataset.from_list(list(constant_len_dataset)) # filter out bad data + # TODO convert to dataset.filter(...) dataset = Dataset.from_list( [ d