From 97d1de1d7cc81481e183748cc07576a740b23b1a Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Tue, 23 Dec 2025 12:16:30 +0700 Subject: [PATCH] chore: add debug logs --- src/axolotl/loaders/tokenizer.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/axolotl/loaders/tokenizer.py b/src/axolotl/loaders/tokenizer.py index 735ae8a7b..da2be830f 100644 --- a/src/axolotl/loaders/tokenizer.py +++ b/src/axolotl/loaders/tokenizer.py @@ -126,6 +126,7 @@ def load_tokenizer(cfg: DictDefault) -> PreTrainedTokenizer: from axolotl.loaders.patch_manager import PatchManager PatchManager.apply_pre_tokenizer_load_patches(cfg) + LOG.debug("Kimi tokenizer patches applied, continuing with tokenizer loading...") def _load_mistral_common_tokenizer(cfg: DictDefault): """Load mistral-common tokenizer""" @@ -139,7 +140,10 @@ def load_tokenizer(cfg: DictDefault) -> PreTrainedTokenizer: if cfg.tokenizer_use_mistral_common: return _load_mistral_common_tokenizer(cfg) + LOG.debug("Loading model config...") model_config = load_model_config(cfg) + LOG.debug("Model config loaded successfully") + tokenizer_kwargs = {} use_fast = True # this is the default @@ -163,12 +167,14 @@ def load_tokenizer(cfg: DictDefault) -> PreTrainedTokenizer: tokenizer_path, cfg.added_tokens_overrides, output_dir=cfg.output_dir ) + LOG.debug(f"Loading tokenizer from {cfg.tokenizer_config}...") tokenizer = tokenizer_cls.from_pretrained( tokenizer_path, trust_remote_code=cfg.trust_remote_code or False, use_fast=use_fast, **tokenizer_kwargs, ) + LOG.debug("Tokenizer loaded successfully") if ( tokenizer.__class__.__name__