fix: total tokens and defaults in config

fix: steps not allowed fractional
2025-12-02 21:38:10 +07:00 · 2025-12-02 21:30:15 +07:00
3 changed files with 6 additions and 4 deletions
--- a/examples/llama-3/diffusion/sft-1b.yaml
+++ b/examples/llama-3/diffusion/sft-1b.yaml
@@ -30,7 +30,7 @@ eval_sample_packing: true
 gradient_accumulation_steps: 4
 micro_batch_size: 4
 num_epochs: 1
-warmup_steps: 0.1
+warmup_ratio: 0.1

 optimizer: adamw_8bit
 lr_scheduler: cosine
@@ -44,7 +44,7 @@ resume_from_checkpoint:
 sdp_attention: true

 logging_steps: 1
-save_strategy: best
+save_strategy: epoch
 eval_strategy: epoch

 special_tokens:
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,7 +13,7 @@ packaging==23.2
 huggingface_hub>=0.36.0
 peft>=0.18.0
 tokenizers>=0.22.1
-transformers==4.57.3
+transformers==4.57.1
 accelerate==1.11.0
 datasets==4.4.1
 deepspeed>=0.17.0
--- a/src/axolotl/core/trainers/base.py
+++ b/src/axolotl/core/trainers/base.py
@@ -631,7 +631,9 @@ class AxolotlTrainer(
            logs["tokens_per_second_per_gpu"] = round(
                self.state.last_tokens_per_second.item() / self.args.logging_steps, 2
            )
-            logs["total_tokens"] = int(self.state.total_tokens.item())
+
+            if hasattr(self.state, "total_tokens"):
+                logs["total_tokens"] = int(self.state.total_tokens.item())

        del self._stored_metrics[train_eval]
Author	SHA1	Message	Date
NanoCode012	08c8f3f22f	fix: total tokens and defaults in config	2025-12-02 21:38:10 +07:00
NanoCode012	76f0fe2621	fix: steps not allowed fractional	2025-12-02 21:30:15 +07:00