diff --git a/src/axolotl/utils/config/models/input/v0_4_1/__init__.py b/src/axolotl/utils/config/models/input/v0_4_1/__init__.py index 44e247886..e5edf8e7b 100644 --- a/src/axolotl/utils/config/models/input/v0_4_1/__init__.py +++ b/src/axolotl/utils/config/models/input/v0_4_1/__init__.py @@ -489,7 +489,7 @@ class HyperparametersConfig(BaseModel): adam_beta1: Optional[float] = None adam_beta2: Optional[float] = None max_grad_norm: Optional[float] = None - num_epochs: int = Field(default=1) + num_epochs: float = Field(default=1.0) @field_validator("batch_size") @classmethod diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py index bfd21703d..caa74fccc 100644 --- a/src/axolotl/utils/trainer.py +++ b/src/axolotl/utils/trainer.py @@ -374,7 +374,7 @@ def calculate_total_num_steps(cfg, train_dataset, update=True): if cfg.sample_packing_eff_est: total_num_steps = ( # match count to len est in dataloader - ( + int( math.floor( 0.99 * cfg.total_num_tokens