allow overriding of model_config parameters from the YML (#853)

* allow overriding of model_config parameters from the YML * remove old logging, update readme * move the updating of model config to the load_model_config function * add warning for deprecated rope_scaling in the root of the YML config
2023-11-15 23:47:08 -05:00
parent b3a61e8ce2
commit 1bc11868eb
3 changed files with 32 additions and 40 deletions
--- a/README.md
+++ b/README.md
@@ -489,6 +489,14 @@ is_llama_derived_model:
 # Please note that if you set this to true, `padding_side` will be set to "left" by default
 is_mistral_derived_model:

+# optional overrides to the base model configuration
+model_config:
+  # RoPE Scaling https://github.com/huggingface/transformers/pull/24653
+  rope_scaling:
+    type: # linear | dynamic
+    factor: # float
+
+
 # Whether you are training a 4-bit GPTQ quantized model
 gptq: true
 gptq_groupsize: 128 # group size
@@ -756,10 +764,6 @@ landmark_attention:
 # xpos RoPE see https://github.com/kaiokendev/cutoff-len-is-context-len/blob/main/util/xpos_rope_llama_monkey_patch.py
 # LLaMA only
 xpos_rope:
-# RoPE Scaling https://github.com/huggingface/transformers/pull/24653
-rope_scaling:
-  type: # linear | dynamic
-  factor: # float

 # Resume from a specific checkpoint dir
 resume_from_checkpoint: