From 2520ecd6df3e0eb1d1813f3ad6dcb429d84e61fc Mon Sep 17 00:00:00 2001
From: Wing Lian <wing.lian@gmail.com>
Date: Tue, 30 May 2023 22:32:44 -0400
Subject: [PATCH 1/3] split up llama model loading so config can be loaded from
 base config and models can be loaded from a path

---
 src/axolotl/utils/models.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py
index 0737d0f12..952aaaa97 100644
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -10,7 +10,7 @@ from typing import TYPE_CHECKING, Optional, Tuple  # noqa: F401
 import bitsandbytes as bnb
 import torch
 import transformers
-from transformers import AutoModelForCausalLM  # noqa: F401
+from transformers import AutoModelForCausalLM, LlamaConfig  # noqa: F401
 from transformers import PreTrainedModel  # noqa: F401
 from transformers import AutoConfig, AutoTokenizer, BitsAndBytesConfig
 
@@ -172,8 +172,10 @@ def load_model(
             )
             load_in_8bit = False
         elif is_llama_derived_model and "LlamaForCausalLM" in globals():
+            config = LlamaConfig.from_pretrained(base_model_config)
             model = LlamaForCausalLM.from_pretrained(
                 base_model,
+                config=config,
                 load_in_8bit=cfg.load_in_8bit and cfg.adapter is not None,
                 load_in_4bit=cfg.load_in_4bit and cfg.adapter is not None,
                 torch_dtype=torch_dtype,

From 39a208c2bcfc38140f1ec2fe91f6e8f922dd1382 Mon Sep 17 00:00:00 2001
From: Wing Lian <wing.lian@gmail.com>
Date: Tue, 30 May 2023 23:00:02 -0400
Subject: [PATCH 2/3] fix up tokenizer config, isort fix

---
 scripts/finetune.py         |  5 +++--
 src/axolotl/utils/models.py | 15 ++++++++++-----
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/scripts/finetune.py b/scripts/finetune.py
index 6c42b3061..e1b0b2e59 100644
--- a/scripts/finetune.py
+++ b/scripts/finetune.py
@@ -171,8 +171,9 @@ def train(
     validate_config(cfg)
 
     # load the tokenizer first
-    logging.info("loading tokenizer...")
-    tokenizer = load_tokenizer(cfg.base_model_config, cfg.tokenizer_type, cfg)
+    tokenizer_config = cfg.tokenizer_config or cfg.base_model_config
+    logging.info(f"loading tokenizer... {tokenizer_config}")
+    tokenizer = load_tokenizer(tokenizer_config, cfg.tokenizer_type, cfg)
 
     if check_not_in(
         ["inference", "shard", "merge_lora"], kwargs
diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py
index 952aaaa97..dc303bca6 100644
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -10,9 +10,14 @@ from typing import TYPE_CHECKING, Optional, Tuple  # noqa: F401
 import bitsandbytes as bnb
 import torch
 import transformers
-from transformers import AutoModelForCausalLM, LlamaConfig  # noqa: F401
 from transformers import PreTrainedModel  # noqa: F401
-from transformers import AutoConfig, AutoTokenizer, BitsAndBytesConfig
+from transformers import (  # noqa: F401
+    AutoConfig,
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    BitsAndBytesConfig,
+    LlamaConfig,
+)
 
 try:
     from transformers import LlamaForCausalLM
@@ -31,18 +36,18 @@ if TYPE_CHECKING:
 
 
 def load_tokenizer(
-    base_model_config,
+    tokenizer_config,
     tokenizer_type,
     cfg,
 ):
     if tokenizer_type:
         tokenizer = getattr(transformers, tokenizer_type).from_pretrained(
-            base_model_config,
+            tokenizer_config,
             trust_remote_code=cfg.trust_remote_code or False,
         )
     else:
         tokenizer = AutoTokenizer.from_pretrained(
-            base_model_config,
+            tokenizer_config,
             trust_remote_code=cfg.trust_remote_code or False,
         )
 

From e3c494ca7b8f5332233337298e0c382e39b36a9d Mon Sep 17 00:00:00 2001
From: Wing Lian <wing.lian@gmail.com>
Date: Tue, 30 May 2023 23:55:45 -0400
Subject: [PATCH 3/3] remove unused import and update readme

---
 README.md                   | 3 +++
 src/axolotl/utils/models.py | 1 -
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index e1391e39b..f8f226963 100644
--- a/README.md
+++ b/README.md
@@ -171,6 +171,9 @@ base_model_ignore_patterns:
 # if the base_model repo on hf hub doesn't include configuration .json files,
 # you can set that here, or leave this empty to default to base_model
 base_model_config: ./llama-7b-hf
+# Optional tokenizer configuration override in case you want to use a different tokenizer
+# than the one defined in the base model
+tokenizer_config:
 # If you want to specify the type of model to load, AutoModelForCausalLM is a good choice too
 model_type: AutoModelForCausalLM
 # Corresponding tokenizer for the model AutoTokenizer is a good choice
diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py
index dc303bca6..cf351a78d 100644
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -30,7 +30,6 @@ from axolotl.prompt_tokenizers import LLAMA_DEFAULT_PAD_TOKEN
 
 if TYPE_CHECKING:
     from peft import PeftConfig  # noqa: F401
-    from transformers import PreTrainedTokenizer  # noqa: F401
 
     from axolotl.utils.dict import DictDefault  # noqa: F401