diff --git a/configs/galactica_1_3B.yml b/configs/galactica_1_3B.yml index ed722f34e..1682849cf 100644 --- a/configs/galactica_1_3B.yml +++ b/configs/galactica_1_3B.yml @@ -34,7 +34,7 @@ tf32: false early_stopping_patience: resume_from_checkpoint: local_rank: -special_tokens: +tokens: pad_token: "[PAD]" bos_token: "" eos_token: "" diff --git a/configs/llama_7B_jeopardy.yml b/configs/llama_7B_jeopardy.yml index 1f0fbf9cf..f73bec348 100644 --- a/configs/llama_7B_jeopardy.yml +++ b/configs/llama_7B_jeopardy.yml @@ -51,7 +51,7 @@ deepspeed: weight_decay: 0.0001 fsdp: fsdp_config: -special_tokens: +tokens: pad_token: "[PAD]" bos_token: "" eos_token: "" diff --git a/configs/stability_3b.yml b/configs/stability_3b.yml index 080f4c753..c5f2198d8 100644 --- a/configs/stability_3b.yml +++ b/configs/stability_3b.yml @@ -49,7 +49,7 @@ deepspeed: weight_decay: 0.01 fsdp: fsdp_config: -#special_tokens: +#tokens: # pad_token: "[PAD]" # bos_token: "" # eos_token: "" diff --git a/examples/4bit-lora-7b/config.yml b/examples/4bit-lora-7b/config.yml index 32cb7d680..345e0812e 100644 --- a/examples/4bit-lora-7b/config.yml +++ b/examples/4bit-lora-7b/config.yml @@ -55,7 +55,7 @@ deepspeed: weight_decay: 0.0001 fsdp: fsdp_config: -special_tokens: +tokens: pad_token: "[PAD]" bos_token: "" eos_token: "" diff --git a/examples/mpt-7b/config.yml b/examples/mpt-7b/config.yml index 1323cc29b..f33452266 100644 --- a/examples/mpt-7b/config.yml +++ b/examples/mpt-7b/config.yml @@ -1,7 +1,6 @@ base_model: mosaicml/mpt-7b base_model_config: mosaicml/mpt-7b -model_type: AutoModelForCausalLM -tokenizer_type: GPTNeoXTokenizer +tokenizer_type: AutoTokenizer trust_remote_code: true # required for mpt as their model class is not merged into transformers yet load_in_8bit: false datasets: @@ -25,7 +24,7 @@ wandb_watch: wandb_run_id: wandb_log_model: checkpoint output_dir: ./mpt-alpaca-7b -batch_size: 4 +batch_size: 1 micro_batch_size: 1 num_epochs: 3 optimizer: adamw_bnb_8bit @@ -52,7 +51,7 @@ deepspeed: weight_decay: 0.0001 fsdp: fsdp_config: -special_tokens: +tokens: pad_token: "<|padding|>" bos_token: "<|endoftext|>" eos_token: "<|endoftext|>" diff --git a/examples/redpajama/config-3b.yml b/examples/redpajama/config-3b.yml index 4268dd2cf..229d6615c 100644 --- a/examples/redpajama/config-3b.yml +++ b/examples/redpajama/config-3b.yml @@ -52,7 +52,7 @@ deepspeed: weight_decay: 0.0001 fsdp: fsdp_config: -special_tokens: +tokens: pad_token: "<|padding|>" bos_token: "<|endoftext|>" eos_token: "<|endoftext|>"