Changes from dataset_processes to dataset_num_proc (#3352) [skip ci]

* changes from dataset_processes to dataset_num_proc

* deprecation message improved

---------

Co-authored-by: Juliana Nieto Cárdenas <jnietoca@purdue.edu>
This commit is contained in:
tgoab
2026-02-10 05:44:17 -05:00
committed by GitHub
parent 0343a72cc9
commit 530a0c0bf0
5 changed files with 11 additions and 8 deletions

View File

@@ -30,7 +30,7 @@ class TestStreamingDatasets:
"sample_packing": sample_packing,
"pretrain_multipack_attn": sample_packing,
"streaming_multipack_buffer_size": 10000,
"dataset_processes": 1,
"dataset_num_proc": 1,
"special_tokens": {
"pad_token": "<|endoftext|>",
},