Add num_proc to fix data set slow processing issue (#2681) [skip ci]

This commit is contained in:
michelyang
2025-05-16 10:06:20 -07:00
committed by GitHub
parent 8f8a7afb05
commit c9797de6bb

View File

@@ -72,6 +72,7 @@ def map_dataset(cfg, data_set, ds_transform_fn, tokenizer, **map_kwargs):
data_set = data_set.map(
ds_transform_fn,
desc="Mapping RL Dataset",
num_proc=cfg.dataset_processes,
**map_kwargs,
)