Fix: remove the numerous sequential log (#2461)
* fix: remove sequential logs * feat(doc): add for sample pack sequentially and curriculum sampling
This commit is contained in:
@@ -320,9 +320,13 @@ total_num_tokens:
|
||||
sample_packing_group_size: 100000
|
||||
# The number of samples which can be packed into one sequence. Increase if using a large sequence_len with many short samples.
|
||||
sample_packing_bin_size: 200
|
||||
sample_pack_sequentially: # Optional[bool]. Whether to pack samples sequentially.
|
||||
|
||||
# whether to concatenate samples during pretraining
|
||||
pretraining_sample_concatenation:
|
||||
|
||||
curriculum_sampling: # Optional[bool]. Whether to use sequential sampling for curriculum learning
|
||||
|
||||
# Use batch flattening for speedups when not using sample_packing
|
||||
batch_flattening:
|
||||
|
||||
|
||||
@@ -12,7 +12,9 @@ from torch.utils.data import BatchSampler, Sampler, SequentialSampler
|
||||
|
||||
from axolotl.utils.distributed import reduce_and_broadcast
|
||||
|
||||
LOG = logging.getLogger("axolotl.utils.samplers.multipack")
|
||||
LOG = logging.getLogger(__name__)
|
||||
|
||||
LOG.setLevel(logging.INFO)
|
||||
|
||||
|
||||
@numba.njit
|
||||
@@ -202,7 +204,6 @@ class MultipackBatchSampler(BatchSampler):
|
||||
lengths_cumsum = np.cumsum(lengths)
|
||||
|
||||
if self.sequential:
|
||||
LOG.debug("using sequential sample packing algorithm")
|
||||
batches, total_used, total_slots = allocate_sequentially(
|
||||
lengths=lengths,
|
||||
rank=0,
|
||||
@@ -210,7 +211,6 @@ class MultipackBatchSampler(BatchSampler):
|
||||
n=1,
|
||||
)
|
||||
else:
|
||||
LOG.debug("using non-sequential sample packing algorithm")
|
||||
batches, total_used, total_slots = allocate(
|
||||
lengths=lengths,
|
||||
lengths_cumsum=lengths_cumsum,
|
||||
|
||||
Reference in New Issue
Block a user