From fd55bc87e2805e37038c25acc5005f3bcf525da2 Mon Sep 17 00:00:00 2001 From: Aman Karmani Date: Tue, 29 Aug 2023 01:03:41 +0000 Subject: [PATCH] use math.ceil instead of round /cc #498 --- src/axolotl/utils/trainer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/axolotl/utils/trainer.py b/src/axolotl/utils/trainer.py index 1bc190fe2..fcbdd6d3e 100644 --- a/src/axolotl/utils/trainer.py +++ b/src/axolotl/utils/trainer.py @@ -588,7 +588,9 @@ def setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer, total_num_ "padding": True, # True/"longest" is the default } if cfg.pad_to_sequence_len: - data_collator_kwargs["pad_to_multiple_of"] = 64 * round(cfg.sequence_len / 64) + data_collator_kwargs["pad_to_multiple_of"] = 64 * math.ceil( + cfg.sequence_len / 64 + ) else: # A100 is best at 64, while others at 8. Let's use the larger so we don't have to check # https://docs.nvidia.com/deeplearning/performance/dl-performance-matrix-multiplication/index.html