From 86ba574698f3ceffb8012b9729d32d15de437946 Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Fri, 6 Dec 2024 15:26:09 +0700 Subject: [PATCH] feat: add num_proc and load from cache for rl mapping --- src/axolotl/utils/data/rl.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/axolotl/utils/data/rl.py b/src/axolotl/utils/data/rl.py index eaa834822..821d28645 100644 --- a/src/axolotl/utils/data/rl.py +++ b/src/axolotl/utils/data/rl.py @@ -71,6 +71,8 @@ def map_dataset(cfg, data_set, ds_transform_fn, tokenizer, **map_kwargs): data_set = data_set.map( ds_transform_fn, + num_proc=cfg.dataset_processes, + load_from_cache_file=not cfg.is_preprocess, desc="Mapping RL Dataset", **map_kwargs, )