Feat: Add rope scaling (#343)
* Feat: Add rope scaling * fix: move rope config
This commit is contained in:
@@ -474,6 +474,10 @@ landmark_attention:
|
|||||||
# xpos RoPE see https://github.com/kaiokendev/cutoff-len-is-context-len/blob/main/util/xpos_rope_llama_monkey_patch.py
|
# xpos RoPE see https://github.com/kaiokendev/cutoff-len-is-context-len/blob/main/util/xpos_rope_llama_monkey_patch.py
|
||||||
# llama only
|
# llama only
|
||||||
xpos_rope:
|
xpos_rope:
|
||||||
|
# RoPE Scaling https://github.com/huggingface/transformers/pull/24653
|
||||||
|
rope_scaling:
|
||||||
|
type: # linear | dynamic
|
||||||
|
factor: # float
|
||||||
|
|
||||||
# resume from a specific checkpoint dir
|
# resume from a specific checkpoint dir
|
||||||
resume_from_checkpoint:
|
resume_from_checkpoint:
|
||||||
|
|||||||
@@ -219,7 +219,9 @@ def load_model(
|
|||||||
elif cfg.is_llama_derived_model and not cfg.trust_remote_code:
|
elif cfg.is_llama_derived_model and not cfg.trust_remote_code:
|
||||||
from transformers import LlamaForCausalLM
|
from transformers import LlamaForCausalLM
|
||||||
|
|
||||||
config = LlamaConfig.from_pretrained(base_model_config)
|
config = LlamaConfig.from_pretrained(
|
||||||
|
base_model_config, rope_scaling=cfg.rope_scaling
|
||||||
|
)
|
||||||
model = LlamaForCausalLM.from_pretrained(
|
model = LlamaForCausalLM.from_pretrained(
|
||||||
base_model,
|
base_model,
|
||||||
config=config,
|
config=config,
|
||||||
|
|||||||
Reference in New Issue
Block a user