Fix: remove the numerous sequential log (#2461)

* fix: remove sequential logs * feat(doc): add for sample pack sequentially and curriculum sampling
2025-04-01 20:20:00 +07:00
parent 9b95e06cbb
commit f4ae8816bb
2 changed files with 7 additions and 3 deletions
--- a/docs/config.qmd
+++ b/docs/config.qmd
@@ -320,9 +320,13 @@ total_num_tokens:
 sample_packing_group_size: 100000
 # The number of samples which can be packed into one sequence. Increase if using a large sequence_len with many short samples.
 sample_packing_bin_size: 200
+sample_pack_sequentially: # Optional[bool]. Whether to pack samples sequentially.
+
 # whether to concatenate samples during pretraining
 pretraining_sample_concatenation:

+curriculum_sampling: # Optional[bool]. Whether to use sequential sampling for curriculum learning
+
 # Use batch flattening for speedups when not using sample_packing
 batch_flattening:

--- a/src/axolotl/utils/samplers/multipack.py
+++ b/src/axolotl/utils/samplers/multipack.py
@@ -12,7 +12,9 @@ from torch.utils.data import BatchSampler, Sampler, SequentialSampler

 from axolotl.utils.distributed import reduce_and_broadcast

-LOG = logging.getLogger("axolotl.utils.samplers.multipack")
+LOG = logging.getLogger(__name__)
+
+LOG.setLevel(logging.INFO)


@numba.njit
@@ -202,7 +204,6 @@ class MultipackBatchSampler(BatchSampler):
        lengths_cumsum = np.cumsum(lengths)

        if self.sequential:
-            LOG.debug("using sequential sample packing algorithm")
            batches, total_used, total_slots = allocate_sequentially(
                lengths=lengths,
                rank=0,
@@ -210,7 +211,6 @@ class MultipackBatchSampler(BatchSampler):
                n=1,
            )
        else:
-            LOG.debug("using non-sequential sample packing algorithm")
            batches, total_used, total_slots = allocate(
                lengths=lengths,
                lengths_cumsum=lengths_cumsum,