diff --git a/examples/llama-3/diffusion/sft-1b.yaml b/examples/llama-3/diffusion/sft-1b.yaml index a9a84ace4..d34144d92 100644 --- a/examples/llama-3/diffusion/sft-1b.yaml +++ b/examples/llama-3/diffusion/sft-1b.yaml @@ -44,7 +44,7 @@ resume_from_checkpoint: sdp_attention: true logging_steps: 1 -save_strategy: best +save_strategy: epoch eval_strategy: epoch special_tokens: diff --git a/src/axolotl/core/trainers/base.py b/src/axolotl/core/trainers/base.py index 7896c6088..cd4b86641 100644 --- a/src/axolotl/core/trainers/base.py +++ b/src/axolotl/core/trainers/base.py @@ -631,7 +631,9 @@ class AxolotlTrainer( logs["tokens_per_second_per_gpu"] = round( self.state.last_tokens_per_second.item() / self.args.logging_steps, 2 ) - logs["total_tokens"] = int(self.state.total_tokens.item()) + + if hasattr(self.state, "total_tokens"): + logs["total_tokens"] = int(self.state.total_tokens.item()) del self._stored_metrics[train_eval]