Compare commits

...

2 Commits

Author SHA1 Message Date
NanoCode012
08c8f3f22f fix: total tokens and defaults in config 2025-12-02 21:38:10 +07:00
NanoCode012
76f0fe2621 fix: steps not allowed fractional 2025-12-02 21:30:15 +07:00
2 changed files with 5 additions and 3 deletions

View File

@@ -30,7 +30,7 @@ eval_sample_packing: true
gradient_accumulation_steps: 4 gradient_accumulation_steps: 4
micro_batch_size: 4 micro_batch_size: 4
num_epochs: 1 num_epochs: 1
warmup_steps: 0.1 warmup_ratio: 0.1
optimizer: adamw_8bit optimizer: adamw_8bit
lr_scheduler: cosine lr_scheduler: cosine
@@ -44,7 +44,7 @@ resume_from_checkpoint:
sdp_attention: true sdp_attention: true
logging_steps: 1 logging_steps: 1
save_strategy: best save_strategy: epoch
eval_strategy: epoch eval_strategy: epoch
special_tokens: special_tokens:

View File

@@ -631,6 +631,8 @@ class AxolotlTrainer(
logs["tokens_per_second_per_gpu"] = round( logs["tokens_per_second_per_gpu"] = round(
self.state.last_tokens_per_second.item() / self.args.logging_steps, 2 self.state.last_tokens_per_second.item() / self.args.logging_steps, 2
) )
if hasattr(self.state, "total_tokens"):
logs["total_tokens"] = int(self.state.total_tokens.item()) logs["total_tokens"] = int(self.state.total_tokens.item())
del self._stored_metrics[train_eval] del self._stored_metrics[train_eval]