diff --git a/docs/config.qmd b/docs/config.qmd index 2ed365d75..7c41c5126 100644 --- a/docs/config.qmd +++ b/docs/config.qmd @@ -320,9 +320,13 @@ total_num_tokens: sample_packing_group_size: 100000 # The number of samples which can be packed into one sequence. Increase if using a large sequence_len with many short samples. sample_packing_bin_size: 200 +sample_pack_sequentially: # Optional[bool]. Whether to pack samples sequentially. + # whether to concatenate samples during pretraining pretraining_sample_concatenation: +curriculum_sampling: # Optional[bool]. Whether to use sequential sampling for curriculum learning + # Use batch flattening for speedups when not using sample_packing batch_flattening: diff --git a/src/axolotl/utils/samplers/multipack.py b/src/axolotl/utils/samplers/multipack.py index ef47aca87..0d8806d8b 100644 --- a/src/axolotl/utils/samplers/multipack.py +++ b/src/axolotl/utils/samplers/multipack.py @@ -12,7 +12,9 @@ from torch.utils.data import BatchSampler, Sampler, SequentialSampler from axolotl.utils.distributed import reduce_and_broadcast -LOG = logging.getLogger("axolotl.utils.samplers.multipack") +LOG = logging.getLogger(__name__) + +LOG.setLevel(logging.INFO) @numba.njit @@ -202,7 +204,6 @@ class MultipackBatchSampler(BatchSampler): lengths_cumsum = np.cumsum(lengths) if self.sequential: - LOG.debug("using sequential sample packing algorithm") batches, total_used, total_slots = allocate_sequentially( lengths=lengths, rank=0, @@ -210,7 +211,6 @@ class MultipackBatchSampler(BatchSampler): n=1, ) else: - LOG.debug("using non-sequential sample packing algorithm") batches, total_used, total_slots = allocate( lengths=lengths, lengths_cumsum=lengths_cumsum,