feat(dataset): add config to keep processed dataset in memory (#1152)

This commit is contained in:
NanoCode012
2024-01-20 13:19:28 +09:00
committed by GitHub
parent cbecf3e62a
commit 3db5f2fd17
3 changed files with 25 additions and 16 deletions

View File

@@ -618,6 +618,9 @@ push_dataset_to_hub: # repo path
# The maximum number of processes to use while preprocessing your input dataset. This defaults to `os.cpu_count()`
# if not set.
dataset_processes: # defaults to os.cpu_count() if not set
# Keep dataset in memory while preprocessing
# Only needed if cached dataset is taking too much storage
dataset_keep_in_memory:
# push checkpoints to hub
hub_model_id: # repo path to push finetuned model
# how to push checkpoints to hub