Update ModelLoader to set default vocab_size if not defined in model config, enhancing compatibility with tokenizer defaults.

This commit is contained in:
mhenrhcsen
2025-07-17 19:53:41 +02:00
parent 6e71819560
commit 380921ee56

View File

@@ -762,6 +762,10 @@ class ModelLoader:
)
else:
if not hasattr(self.model_config, 'vocab_size'):
LOG.warning("Model config does not have vocab_size attribute, setting to 50257")
self.model_config.vocab_size = 50257
self.model = getattr(transformers, self.model_type).from_pretrained(
self.base_model,
config=self.model_config,