fix: handles AutoTokenizer from untrusted source

Set trust_remote_code param depending of cfg.trust_remote_code when calling AutoTokenizer.from_pretrained
This commit is contained in:
Valentin De Matos
2023-05-24 20:57:10 +02:00
committed by GitHub
parent ed37b2268d
commit 88ad05df54

View File

@@ -167,7 +167,7 @@ def load_model(
else:
tokenizer = getattr(transformers, tokenizer_type).from_pretrained(model)
except:
tokenizer = AutoTokenizer.from_pretrained(base_model_config)
tokenizer = AutoTokenizer.from_pretrained(base_model_config, trust_remote_code=True if cfg.trust_remote_code is True else False)
logging.debug(f"EOS: {tokenizer.eos_token_id} / {tokenizer.eos_token}")
logging.debug(f"BOS: {tokenizer.bos_token_id} / {tokenizer.bos_token}")