From e7a6a5b529e3ddb3f3239866c48e7025f2e2c6ce Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Sat, 11 Apr 2026 00:00:47 +0700 Subject: [PATCH] fix: move warning after we've set any overrides (#3589) [skip ci] --- src/axolotl/loaders/tokenizer.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/axolotl/loaders/tokenizer.py b/src/axolotl/loaders/tokenizer.py index cf5c3d27b..52f714604 100644 --- a/src/axolotl/loaders/tokenizer.py +++ b/src/axolotl/loaders/tokenizer.py @@ -221,14 +221,6 @@ def load_tokenizer(cfg: DictDefault) -> PreTrainedTokenizer: if getattr(tokenizer, attr_name) is None: setattr(tokenizer, attr_name, "<|endoftext|>") - # Generic fallback: if tokenizer still has no pad_token, use eos_token - if tokenizer.pad_token is None and tokenizer.eos_token is not None: - tokenizer.pad_token = tokenizer.eos_token - LOG.warning( - "Tokenizer does not have a pad_token, falling back to eos_token: %s", - tokenizer.eos_token, - ) - additional_special_tokens = None if cfg.special_tokens: special_tokens = cfg.special_tokens.to_dict() @@ -303,6 +295,14 @@ def load_tokenizer(cfg: DictDefault) -> PreTrainedTokenizer: {"additional_special_tokens": additional_special_tokens} ) + # Generic fallback: if tokenizer still has no pad_token, use eos_token + if tokenizer.pad_token is None and tokenizer.eos_token is not None: + tokenizer.pad_token = tokenizer.eos_token + LOG.warning( + "Tokenizer does not have a pad_token, falling back to eos_token: %s", + tokenizer.eos_token, + ) + if is_main_process(): LOG.debug(f"EOS: {tokenizer.eos_token_id} / {tokenizer.eos_token}") LOG.debug(f"BOS: {tokenizer.bos_token_id} / {tokenizer.bos_token}")