fix: revert changing default optimizer to muon (#2965) [skip ci]

This commit is contained in:
NanoCode012
2025-07-22 21:00:30 +07:00
committed by GitHub
parent 631268a0ca
commit 01d8175d48
6 changed files with 6 additions and 6 deletions

View File

@@ -53,7 +53,7 @@ wandb_log_model:
gradient_accumulation_steps: 1
micro_batch_size: 1
num_epochs: 4
optimizer: muon
optimizer: adamw_bnb_8bit
lr_scheduler: cosine
learning_rate: 0.0002

View File

@@ -60,7 +60,7 @@ wandb_log_model:
gradient_accumulation_steps: 4
micro_batch_size: 2
num_epochs: 1
optimizer: muon
optimizer: adamw_bnb_8bit
lr_scheduler: cosine
learning_rate: 0.0002

View File

@@ -55,7 +55,7 @@ wandb_log_model:
gradient_accumulation_steps: 4
micro_batch_size: 2
num_epochs: 1
optimizer: muon
optimizer: adamw_bnb_8bit
lr_scheduler: cosine
learning_rate: 0.0002

View File

@@ -39,7 +39,7 @@ wandb_log_model:
gradient_accumulation_steps: 4
micro_batch_size: 1
num_epochs: 1
optimizer: muon
optimizer: adamw_bnb_8bit
lr_scheduler: cosine
learning_rate: 0.0002

View File

@@ -35,7 +35,7 @@ wandb_log_model:
gradient_accumulation_steps: 4
micro_batch_size: 1
num_epochs: 1
optimizer: muon
optimizer: adamw_bnb_8bit
lr_scheduler: cosine
learning_rate: 0.0002

View File

@@ -35,7 +35,7 @@ wandb_log_model:
gradient_accumulation_steps: 4
micro_batch_size: 1
num_epochs: 1
optimizer: muon
optimizer: adamw_bnb_8bit
lr_scheduler: cosine
learning_rate: 0.0002