From da97a21bdc32710d074b31ddd297c583da422437 Mon Sep 17 00:00:00 2001 From: Vincenzo di Cicco <112694549+v-dicicco@users.noreply.github.com> Date: Thu, 9 Jan 2025 22:01:22 +0100 Subject: [PATCH] Use SequentialSampler if curriculum_sampling is enabled with sample_packing (#2235) --- src/axolotl/core/trainer_builder.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/axolotl/core/trainer_builder.py b/src/axolotl/core/trainer_builder.py index e81740399..5cc2b2ea9 100755 --- a/src/axolotl/core/trainer_builder.py +++ b/src/axolotl/core/trainer_builder.py @@ -608,8 +608,14 @@ class AxolotlTrainer(SchedulerMixin, Trainer): self.state.train_batch_size or self.args.per_device_train_batch_size ) batch_max_len = train_batch_size * self.args.max_seq_length + + if self.args.curriculum_sampling: + sampler = SequentialSampler(self.train_dataset) + else: + sampler = RandomSampler(self.train_dataset) + return MultipackBatchSampler( - RandomSampler(self.train_dataset), + sampler, lengths=get_dataset_lengths(self.train_dataset), packing_efficiency_estimate=self.args.sample_packing_efficiency, batch_max_len=batch_max_len,