Compare commits

..

1 Commits

Author SHA1 Message Date
Wing Lian
dcd916b29b bump transformers 4.57.3 2025-12-02 10:33:44 -05:00
3 changed files with 4 additions and 6 deletions

View File

@@ -30,7 +30,7 @@ eval_sample_packing: true
gradient_accumulation_steps: 4 gradient_accumulation_steps: 4
micro_batch_size: 4 micro_batch_size: 4
num_epochs: 1 num_epochs: 1
warmup_ratio: 0.1 warmup_steps: 0.1
optimizer: adamw_8bit optimizer: adamw_8bit
lr_scheduler: cosine lr_scheduler: cosine
@@ -44,7 +44,7 @@ resume_from_checkpoint:
sdp_attention: true sdp_attention: true
logging_steps: 1 logging_steps: 1
save_strategy: epoch save_strategy: best
eval_strategy: epoch eval_strategy: epoch
special_tokens: special_tokens:

View File

@@ -13,7 +13,7 @@ packaging==23.2
huggingface_hub>=0.36.0 huggingface_hub>=0.36.0
peft>=0.18.0 peft>=0.18.0
tokenizers>=0.22.1 tokenizers>=0.22.1
transformers==4.57.1 transformers==4.57.3
accelerate==1.11.0 accelerate==1.11.0
datasets==4.4.1 datasets==4.4.1
deepspeed>=0.17.0 deepspeed>=0.17.0

View File

@@ -631,9 +631,7 @@ class AxolotlTrainer(
logs["tokens_per_second_per_gpu"] = round( logs["tokens_per_second_per_gpu"] = round(
self.state.last_tokens_per_second.item() / self.args.logging_steps, 2 self.state.last_tokens_per_second.item() / self.args.logging_steps, 2
) )
logs["total_tokens"] = int(self.state.total_tokens.item())
if hasattr(self.state, "total_tokens"):
logs["total_tokens"] = int(self.state.total_tokens.item())
del self._stored_metrics[train_eval] del self._stored_metrics[train_eval]