feat: add num_proc and load from cache for rl mapping

This commit is contained in:
NanoCode012
2024-12-06 15:26:09 +07:00
parent 7fa1089cea
commit 86ba574698

View File

@@ -71,6 +71,8 @@ def map_dataset(cfg, data_set, ds_transform_fn, tokenizer, **map_kwargs):
data_set = data_set.map(
ds_transform_fn,
num_proc=cfg.dataset_processes,
load_from_cache_file=not cfg.is_preprocess,
desc="Mapping RL Dataset",
**map_kwargs,
)