From fae6ed8092102d3b28d9ab492925c1d2647b2d2c Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Fri, 11 Aug 2023 12:17:07 +0900 Subject: [PATCH] Update README.md on pretraining_dataset (#360) * Update README.md on pretraining_dataset * Fix message --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 067ef0d05..8dbb535cd 100644 --- a/README.md +++ b/README.md @@ -505,6 +505,9 @@ torchdistx_path: # Set padding for data collator to 'longest' collator_pad_to_longest: +# Set to HF dataset for type: 'completion' for streaming instead of pre-tokenize +pretraining_dataset: + # Debug mode debug: