diff --git a/configs/galactica_1_3B.yml b/configs/galactica_1_3B.yml
index ed722f34e..1682849cf 100644
--- a/configs/galactica_1_3B.yml
+++ b/configs/galactica_1_3B.yml
@@ -34,7 +34,7 @@ tf32: false
early_stopping_patience:
resume_from_checkpoint:
local_rank:
-special_tokens:
+tokens:
pad_token: "[PAD]"
bos_token: ""
eos_token: ""
diff --git a/configs/llama_7B_jeopardy.yml b/configs/llama_7B_jeopardy.yml
index 1f0fbf9cf..f73bec348 100644
--- a/configs/llama_7B_jeopardy.yml
+++ b/configs/llama_7B_jeopardy.yml
@@ -51,7 +51,7 @@ deepspeed:
weight_decay: 0.0001
fsdp:
fsdp_config:
-special_tokens:
+tokens:
pad_token: "[PAD]"
bos_token: ""
eos_token: ""
diff --git a/configs/stability_3b.yml b/configs/stability_3b.yml
index 080f4c753..c5f2198d8 100644
--- a/configs/stability_3b.yml
+++ b/configs/stability_3b.yml
@@ -49,7 +49,7 @@ deepspeed:
weight_decay: 0.01
fsdp:
fsdp_config:
-#special_tokens:
+#tokens:
# pad_token: "[PAD]"
# bos_token: ""
# eos_token: ""
diff --git a/examples/4bit-lora-7b/config.yml b/examples/4bit-lora-7b/config.yml
index 32cb7d680..345e0812e 100644
--- a/examples/4bit-lora-7b/config.yml
+++ b/examples/4bit-lora-7b/config.yml
@@ -55,7 +55,7 @@ deepspeed:
weight_decay: 0.0001
fsdp:
fsdp_config:
-special_tokens:
+tokens:
pad_token: "[PAD]"
bos_token: ""
eos_token: ""
diff --git a/examples/mpt-7b/config.yml b/examples/mpt-7b/config.yml
index 1323cc29b..f33452266 100644
--- a/examples/mpt-7b/config.yml
+++ b/examples/mpt-7b/config.yml
@@ -1,7 +1,6 @@
base_model: mosaicml/mpt-7b
base_model_config: mosaicml/mpt-7b
-model_type: AutoModelForCausalLM
-tokenizer_type: GPTNeoXTokenizer
+tokenizer_type: AutoTokenizer
trust_remote_code: true # required for mpt as their model class is not merged into transformers yet
load_in_8bit: false
datasets:
@@ -25,7 +24,7 @@ wandb_watch:
wandb_run_id:
wandb_log_model: checkpoint
output_dir: ./mpt-alpaca-7b
-batch_size: 4
+batch_size: 1
micro_batch_size: 1
num_epochs: 3
optimizer: adamw_bnb_8bit
@@ -52,7 +51,7 @@ deepspeed:
weight_decay: 0.0001
fsdp:
fsdp_config:
-special_tokens:
+tokens:
pad_token: "<|padding|>"
bos_token: "<|endoftext|>"
eos_token: "<|endoftext|>"
diff --git a/examples/redpajama/config-3b.yml b/examples/redpajama/config-3b.yml
index 4268dd2cf..229d6615c 100644
--- a/examples/redpajama/config-3b.yml
+++ b/examples/redpajama/config-3b.yml
@@ -52,7 +52,7 @@ deepspeed:
weight_decay: 0.0001
fsdp:
fsdp_config:
-special_tokens:
+tokens:
pad_token: "<|padding|>"
bos_token: "<|endoftext|>"
eos_token: "<|endoftext|>"