Compare commits

..

1 Commits

Author SHA1 Message Date
Wing Lian
dcd916b29b bump transformers 4.57.3 2025-12-02 10:33:44 -05:00
3 changed files with 4 additions and 6 deletions

View File

@@ -30,7 +30,7 @@ eval_sample_packing: true
gradient_accumulation_steps: 4
micro_batch_size: 4
num_epochs: 1
warmup_ratio: 0.1
warmup_steps: 0.1
optimizer: adamw_8bit
lr_scheduler: cosine
@@ -44,7 +44,7 @@ resume_from_checkpoint:
sdp_attention: true
logging_steps: 1
save_strategy: epoch
save_strategy: best
eval_strategy: epoch
special_tokens:

View File

@@ -13,7 +13,7 @@ packaging==23.2
huggingface_hub>=0.36.0
peft>=0.18.0
tokenizers>=0.22.1
transformers==4.57.1
transformers==4.57.3
accelerate==1.11.0
datasets==4.4.1
deepspeed>=0.17.0

View File

@@ -631,9 +631,7 @@ class AxolotlTrainer(
logs["tokens_per_second_per_gpu"] = round(
self.state.last_tokens_per_second.item() / self.args.logging_steps, 2
)
if hasattr(self.state, "total_tokens"):
logs["total_tokens"] = int(self.state.total_tokens.item())
logs["total_tokens"] = int(self.state.total_tokens.item())
del self._stored_metrics[train_eval]