From c9797de6bb208dc95eb7374e76fefaa9f00a58c8 Mon Sep 17 00:00:00 2001 From: michelyang Date: Fri, 16 May 2025 10:06:20 -0700 Subject: [PATCH] Add num_proc to fix data set slow processing issue (#2681) [skip ci] --- src/axolotl/utils/data/rl.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/axolotl/utils/data/rl.py b/src/axolotl/utils/data/rl.py index eaa834822..dc5920099 100644 --- a/src/axolotl/utils/data/rl.py +++ b/src/axolotl/utils/data/rl.py @@ -72,6 +72,7 @@ def map_dataset(cfg, data_set, ds_transform_fn, tokenizer, **map_kwargs): data_set = data_set.map( ds_transform_fn, desc="Mapping RL Dataset", + num_proc=cfg.dataset_processes, **map_kwargs, )