use math.ceil instead of round /cc #498

This commit is contained in:
Aman Karmani
2023-08-29 01:03:41 +00:00
parent 8e197f6fb4
commit fd55bc87e2

View File

@@ -588,7 +588,9 @@ def setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer, total_num_
"padding": True, # True/"longest" is the default
}
if cfg.pad_to_sequence_len:
data_collator_kwargs["pad_to_multiple_of"] = 64 * round(cfg.sequence_len / 64)
data_collator_kwargs["pad_to_multiple_of"] = 64 * math.ceil(
cfg.sequence_len / 64
)
else:
# A100 is best at 64, while others at 8. Let's use the larger so we don't have to check
# https://docs.nvidia.com/deeplearning/performance/dl-performance-matrix-multiplication/index.html