Add dataset_shard_num and dataset_shard_idx

2023-05-27 23:51:17 +09:00
parent 87dffbc451
commit 8626b54aab
1 changed files with 4 additions and 0 deletions
--- a/README.md
+++ b/README.md
@@ -209,6 +209,10 @@ dataset_prepared_path: data/last_run_prepared
 push_dataset_to_hub: # repo path
 # How much of the dataset to set aside as evaluation. 1 = 100%, 0.50 = 50%, etc
 val_set_size: 0.04
+# Num shards for whole dataset
+dataset_shard_num:
+# Index of shard to use for whole dataset
+dataset_shard_idx:

 # the maximum length of an input to train with, this should typically be less than 2048
 # as most models have a token/context limit of 2048