fix log sweep lr

This commit is contained in:
Wing Lian
2023-05-03 15:06:03 -04:00
parent 9105935b00
commit a10a8265ef
2 changed files with 7 additions and 7 deletions

View File

@@ -19,16 +19,15 @@ class InterpolatingLogScheduler(LRScheduler):
self.num_steps = num_steps self.num_steps = num_steps
self.min_lr = min_lr self.min_lr = min_lr
self.max_lr = max_lr self.max_lr = max_lr
self.q = (max_lr / min_lr) ** (1 / num_steps - 1) self.q = (max_lr / min_lr) ** (1 / (num_steps - 1))
super().__init__(optimizer, last_epoch) super().__init__(optimizer, last_epoch)
def get_lr(self): def get_lr(self):
if self.last_epoch == 0: if self.last_epoch <= 0:
lr = self.min_lr lrs = [self.min_lr for base_lr in self.base_lrs]
elif self.last_epoch < self.num_steps: elif self.last_epoch < self.num_steps:
# FIXME, not perfect as we need to account for number of steps are in an epoch, etc lrs = [self.min_lr * (self.q ** (self.last_epoch - 1)) for base_lr in self.base_lrs]
lr = self.min_lr * (self.q ** self.last_epoch)
else: else:
lr = self.max_lr lrs = [self.max_lr for base_lr in self.base_lrs]
return [lr for _ in self.base_lrs] return lrs

View File

@@ -86,6 +86,7 @@ def setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer):
training_args = transformers.TrainingArguments( training_args = transformers.TrainingArguments(
per_device_train_batch_size=cfg.micro_batch_size, per_device_train_batch_size=cfg.micro_batch_size,
gradient_accumulation_steps=cfg.gradient_accumulation_steps, gradient_accumulation_steps=cfg.gradient_accumulation_steps,
eval_accumulation_steps=cfg.gradient_accumulation_steps,
num_train_epochs=cfg.num_epochs, num_train_epochs=cfg.num_epochs,
learning_rate=cfg.learning_rate, learning_rate=cfg.learning_rate,
evaluation_strategy="steps" if cfg.val_set_size > 0 else "no", evaluation_strategy="steps" if cfg.val_set_size > 0 else "no",