chore: add debug logs

This commit is contained in:
NanoCode012
2025-12-23 12:16:30 +07:00
parent b752bac470
commit 97d1de1d7c

View File

@@ -126,6 +126,7 @@ def load_tokenizer(cfg: DictDefault) -> PreTrainedTokenizer:
from axolotl.loaders.patch_manager import PatchManager from axolotl.loaders.patch_manager import PatchManager
PatchManager.apply_pre_tokenizer_load_patches(cfg) PatchManager.apply_pre_tokenizer_load_patches(cfg)
LOG.debug("Kimi tokenizer patches applied, continuing with tokenizer loading...")
def _load_mistral_common_tokenizer(cfg: DictDefault): def _load_mistral_common_tokenizer(cfg: DictDefault):
"""Load mistral-common tokenizer""" """Load mistral-common tokenizer"""
@@ -139,7 +140,10 @@ def load_tokenizer(cfg: DictDefault) -> PreTrainedTokenizer:
if cfg.tokenizer_use_mistral_common: if cfg.tokenizer_use_mistral_common:
return _load_mistral_common_tokenizer(cfg) return _load_mistral_common_tokenizer(cfg)
LOG.debug("Loading model config...")
model_config = load_model_config(cfg) model_config = load_model_config(cfg)
LOG.debug("Model config loaded successfully")
tokenizer_kwargs = {} tokenizer_kwargs = {}
use_fast = True # this is the default use_fast = True # this is the default
@@ -163,12 +167,14 @@ def load_tokenizer(cfg: DictDefault) -> PreTrainedTokenizer:
tokenizer_path, cfg.added_tokens_overrides, output_dir=cfg.output_dir tokenizer_path, cfg.added_tokens_overrides, output_dir=cfg.output_dir
) )
LOG.debug(f"Loading tokenizer from {cfg.tokenizer_config}...")
tokenizer = tokenizer_cls.from_pretrained( tokenizer = tokenizer_cls.from_pretrained(
tokenizer_path, tokenizer_path,
trust_remote_code=cfg.trust_remote_code or False, trust_remote_code=cfg.trust_remote_code or False,
use_fast=use_fast, use_fast=use_fast,
**tokenizer_kwargs, **tokenizer_kwargs,
) )
LOG.debug("Tokenizer loaded successfully")
if ( if (
tokenizer.__class__.__name__ tokenizer.__class__.__name__