From 88ad05df5473d000bd2787b875a231934b3ff435 Mon Sep 17 00:00:00 2001 From: Valentin De Matos <43698357+Thytu@users.noreply.github.com> Date: Wed, 24 May 2023 20:57:10 +0200 Subject: [PATCH] fix: handles AutoTokenizer from untrusted source Set trust_remote_code param depending of cfg.trust_remote_code when calling AutoTokenizer.from_pretrained --- src/axolotl/utils/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py index 45e771deb..80707ff8a 100644 --- a/src/axolotl/utils/models.py +++ b/src/axolotl/utils/models.py @@ -167,7 +167,7 @@ def load_model( else: tokenizer = getattr(transformers, tokenizer_type).from_pretrained(model) except: - tokenizer = AutoTokenizer.from_pretrained(base_model_config) + tokenizer = AutoTokenizer.from_pretrained(base_model_config, trust_remote_code=True if cfg.trust_remote_code is True else False) logging.debug(f"EOS: {tokenizer.eos_token_id} / {tokenizer.eos_token}") logging.debug(f"BOS: {tokenizer.bos_token_id} / {tokenizer.bos_token}")