limit num_proc when saving datasets to disk (#2948) [skip ci]
* limit num_proc when saving datasets to disk * enforce at least 1 in case it rounds down to 0, and sane divisor is at least 8 rows per worker to save * update fixtures with dataset processes since that should never be NoneType * improve reusability for tests
This commit is contained in:
@@ -141,6 +141,7 @@ class TestDatasetPreparation:
|
||||
"type": "alpaca",
|
||||
},
|
||||
],
|
||||
"dataset_processes": 4,
|
||||
}
|
||||
)
|
||||
|
||||
@@ -179,6 +180,7 @@ class TestDatasetPreparation:
|
||||
"type": "alpaca",
|
||||
},
|
||||
],
|
||||
"dataset_processes": 4,
|
||||
}
|
||||
)
|
||||
|
||||
@@ -217,6 +219,7 @@ class TestDatasetPreparation:
|
||||
"type": "alpaca",
|
||||
},
|
||||
],
|
||||
"dataset_processes": 4,
|
||||
}
|
||||
)
|
||||
|
||||
@@ -249,6 +252,7 @@ class TestDatasetPreparation:
|
||||
"type": "alpaca",
|
||||
},
|
||||
],
|
||||
"dataset_processes": 4,
|
||||
}
|
||||
)
|
||||
|
||||
@@ -281,6 +285,7 @@ class TestDatasetPreparation:
|
||||
"type": "alpaca",
|
||||
},
|
||||
],
|
||||
"dataset_processes": 4,
|
||||
}
|
||||
)
|
||||
|
||||
@@ -365,6 +370,7 @@ class TestDatasetPreparation:
|
||||
"rl": "dpo",
|
||||
"chat_template": "llama3",
|
||||
"datasets": [ALPACA_MESSAGES_CONFIG_REVISION],
|
||||
"dataset_processes": 4,
|
||||
}
|
||||
)
|
||||
|
||||
@@ -466,6 +472,7 @@ class TestDatasetPreparation:
|
||||
"type": "alpaca",
|
||||
},
|
||||
],
|
||||
"dataset_processes": 4,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user