diff --git a/requirements.txt b/requirements.txt index 3af94421d..1ca2770e6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -67,3 +67,5 @@ schedulefree==1.4.1 axolotl-contribs-lgpl==0.0.6 axolotl-contribs-mit==0.0.3 + +mistral-common[hf-hub] diff --git a/src/axolotl/loaders/tokenizer.py b/src/axolotl/loaders/tokenizer.py index 7c764941b..8671e69f8 100644 --- a/src/axolotl/loaders/tokenizer.py +++ b/src/axolotl/loaders/tokenizer.py @@ -379,7 +379,7 @@ class TokenizerConfiguration: """Load Mistral tokenizer from model configuration.""" # Instantiate Mistral tokenizer model_id = self.cfg.base_model - mistral_tokenizer = MistralTokenizer.from_file(model_id) + mistral_tokenizer = MistralTokenizer.from_hf_hub(model_id) # Wrap it for compatibility tokenizer = MistralTokenizerWrapper(mistral_tokenizer, model_id) @@ -637,9 +637,9 @@ def load_tokenizer(cfg): config = TokenizerConfiguration(cfg) # Check if we should use Mistral tokenizer - if config.detect_by_model_name_mapping(): + try: tokenizer = config.load_mistral_tokenizer() - else: + except: # Standard tokenizer loading tokenizer_cls = config.get_tokenizer_class() tokenizer_path = config.get_tokenizer_path()