Changes from dataset_processes to dataset_num_proc (#3352) [skip ci]

* changes from dataset_processes to dataset_num_proc

* deprecation message improved

---------

Co-authored-by: Juliana Nieto Cárdenas <jnietoca@purdue.edu>
This commit is contained in:
tgoab
2026-02-10 05:44:17 -05:00
committed by GitHub
parent 0343a72cc9
commit 530a0c0bf0
5 changed files with 11 additions and 8 deletions

View File

@@ -79,7 +79,7 @@ def fixture_base_cfg():
"ddp_timeout": 1800,
"ddp_bucket_cap_mb": 25,
"ddp_broadcast_buffers": False,
"dataset_processes": 4,
"dataset_num_proc": 4,
}
)

View File

@@ -30,7 +30,7 @@ class TestStreamingDatasets:
"sample_packing": sample_packing,
"pretrain_multipack_attn": sample_packing,
"streaming_multipack_buffer_size": 10000,
"dataset_processes": 1,
"dataset_num_proc": 1,
"special_tokens": {
"pad_token": "<|endoftext|>",
},