From c15ea6b95682e0d425050aad80989361a9a86716 Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Wed, 5 Feb 2025 23:46:59 +0700 Subject: [PATCH] fix: load vocab_size --- .../lolcats/linear_llama/modeling_linear_llama.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/axolotl/integrations/lolcats/linear_llama/modeling_linear_llama.py b/src/axolotl/integrations/lolcats/linear_llama/modeling_linear_llama.py index fa30ba5ab..f77c796c4 100644 --- a/src/axolotl/integrations/lolcats/linear_llama/modeling_linear_llama.py +++ b/src/axolotl/integrations/lolcats/linear_llama/modeling_linear_llama.py @@ -108,13 +108,14 @@ class LinearLlamaForCausalLM(LlamaForCausalLM): if config is None: raise ValueError("Missing config") - # initialize the model with prior weights + # initialize a new model with config new_model = cls(config=config) # remove the default model and lm_head del new_model.model del new_model.lm_head + # load converted model, lm_head, and vocab_size from llama model new_model.model = convert_attention( model.model, attention_config=config.attention_config, @@ -122,6 +123,7 @@ class LinearLlamaForCausalLM(LlamaForCausalLM): remove_base_attn=remove_base_attn, ) new_model.lm_head = model.lm_head + new_model.vocab_size = model.vocab_size return new_model