From eea6e8303adb15c3d2d8415e0a8ba479c2d9c67b Mon Sep 17 00:00:00 2001 From: Casper Date: Mon, 15 Jan 2024 23:48:24 +0100 Subject: [PATCH] Disable datasets caching when preparing dataset for packing --- src/axolotl/utils/trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py index 5588e768f..871ee603c 100644 --- a/src/axolotl/utils/trainer.py +++ b/src/axolotl/utils/trainer.py @@ -108,7 +108,7 @@ def disable_datasets_caching(): def process_datasets_for_packing(cfg, train_dataset, eval_dataset, tokenizer): drop_long = partial(drop_long_seq, sequence_len=cfg.sequence_len) - with zero_first(is_main_process()): + with zero_first(is_main_process()), disable_datasets_caching(): if cfg.group_by_length: train_dataset = train_dataset.map( add_length, num_proc=cfg.dataset_processes