let's not cleanup the cached datasets
This commit is contained in:
@@ -371,12 +371,6 @@ def train(
|
||||
model = BetterTransformer.reverse(model)
|
||||
model.save_pretrained(cfg.output_dir)
|
||||
|
||||
trainer.accelerator.wait_for_everyone()
|
||||
if trainer.accelerator.is_main_process:
|
||||
train_dataset.cleanup_cache_files()
|
||||
if eval_dataset:
|
||||
eval_dataset.cleanup_cache_files()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
fire.Fire(train)
|
||||
|
||||
@@ -377,6 +377,7 @@ def load_prepare_datasets(
|
||||
dataset = Dataset.from_list(list(constant_len_dataset))
|
||||
|
||||
# filter out bad data
|
||||
# TODO convert to dataset.filter(...)
|
||||
dataset = Dataset.from_list(
|
||||
[
|
||||
d
|
||||
|
||||
Reference in New Issue
Block a user