Fix: remove the numerous sequential log (#2461)

* fix: remove sequential logs

* feat(doc): add for sample pack sequentially and curriculum sampling
This commit is contained in:
NanoCode012
2025-04-01 20:20:00 +07:00
committed by GitHub
parent 9b95e06cbb
commit f4ae8816bb
2 changed files with 7 additions and 3 deletions

View File

@@ -320,9 +320,13 @@ total_num_tokens:
sample_packing_group_size: 100000
# The number of samples which can be packed into one sequence. Increase if using a large sequence_len with many short samples.
sample_packing_bin_size: 200
sample_pack_sequentially: # Optional[bool]. Whether to pack samples sequentially.
# whether to concatenate samples during pretraining
pretraining_sample_concatenation:
curriculum_sampling: # Optional[bool]. Whether to use sequential sampling for curriculum learning
# Use batch flattening for speedups when not using sample_packing
batch_flattening: