From a10a8265efde4ec61037560e3b8e2e31dab984af Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Wed, 3 May 2023 15:06:03 -0400 Subject: [PATCH] fix log sweep lr --- src/axolotl/utils/schedulers.py | 13 ++++++------- src/axolotl/utils/trainer.py | 1 + 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/axolotl/utils/schedulers.py b/src/axolotl/utils/schedulers.py index 4a90436e7..72916f037 100644 --- a/src/axolotl/utils/schedulers.py +++ b/src/axolotl/utils/schedulers.py @@ -19,16 +19,15 @@ class InterpolatingLogScheduler(LRScheduler): self.num_steps = num_steps self.min_lr = min_lr self.max_lr = max_lr - self.q = (max_lr / min_lr) ** (1 / num_steps - 1) + self.q = (max_lr / min_lr) ** (1 / (num_steps - 1)) super().__init__(optimizer, last_epoch) def get_lr(self): - if self.last_epoch == 0: - lr = self.min_lr + if self.last_epoch <= 0: + lrs = [self.min_lr for base_lr in self.base_lrs] elif self.last_epoch < self.num_steps: - # FIXME, not perfect as we need to account for number of steps are in an epoch, etc - lr = self.min_lr * (self.q ** self.last_epoch) + lrs = [self.min_lr * (self.q ** (self.last_epoch - 1)) for base_lr in self.base_lrs] else: - lr = self.max_lr + lrs = [self.max_lr for base_lr in self.base_lrs] - return [lr for _ in self.base_lrs] + return lrs diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py index 63c6856b7..90a72a465 100644 --- a/src/axolotl/utils/trainer.py +++ b/src/axolotl/utils/trainer.py @@ -86,6 +86,7 @@ def setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer): training_args = transformers.TrainingArguments( per_device_train_batch_size=cfg.micro_batch_size, gradient_accumulation_steps=cfg.gradient_accumulation_steps, + eval_accumulation_steps=cfg.gradient_accumulation_steps, num_train_epochs=cfg.num_epochs, learning_rate=cfg.learning_rate, evaluation_strategy="steps" if cfg.val_set_size > 0 else "no",