option to not concatenate during pretraining (#2263)

* option to not concatenate during pretraining

* simplify conditional and add doc to config.qmd
This commit is contained in:
Wing Lian
2025-01-20 14:07:34 -05:00
committed by GitHub
parent 8606093921
commit af727eedf7
4 changed files with 19 additions and 0 deletions

View File

@@ -244,6 +244,8 @@ total_num_tokens:
sample_packing_group_size: 100000
# The number of samples which can be packed into one sequence. Increase if using a large sequence_len with many short samples.
sample_packing_bin_size: 200
# whether to concatenate samples during pretraining
pretraining_sample_concatenation:
# Use batch flattening for speedups when not using sample_packing
batch_flattening: