use from_hf_hub

This commit is contained in:
Dan Saunders
2025-06-09 01:42:48 +00:00
parent 65f8988efd
commit aa236a4669
2 changed files with 5 additions and 3 deletions

View File

@@ -67,3 +67,5 @@ schedulefree==1.4.1
axolotl-contribs-lgpl==0.0.6
axolotl-contribs-mit==0.0.3
mistral-common[hf-hub]

View File

@@ -379,7 +379,7 @@ class TokenizerConfiguration:
"""Load Mistral tokenizer from model configuration."""
# Instantiate Mistral tokenizer
model_id = self.cfg.base_model
mistral_tokenizer = MistralTokenizer.from_file(model_id)
mistral_tokenizer = MistralTokenizer.from_hf_hub(model_id)
# Wrap it for compatibility
tokenizer = MistralTokenizerWrapper(mistral_tokenizer, model_id)
@@ -637,9 +637,9 @@ def load_tokenizer(cfg):
config = TokenizerConfiguration(cfg)
# Check if we should use Mistral tokenizer
if config.detect_by_model_name_mapping():
try:
tokenizer = config.load_mistral_tokenizer()
else:
except:
# Standard tokenizer loading
tokenizer_cls = config.get_tokenizer_class()
tokenizer_path = config.get_tokenizer_path()