From 1c14c4a15cb2cb70d90bedd0a1580e30314c90cb Mon Sep 17 00:00:00 2001 From: Sunny Liu <22844540+bursteratom@users.noreply.github.com> Date: Tue, 17 Dec 2024 11:24:30 -0500 Subject: [PATCH] Add hub model id config options to all example yml files (#2196) [skip ci] * added hub model_id in example yml * add hub model id to example yml --- examples/cerebras/btlm-ft.yml | 4 ++++ examples/cerebras/qlora.yml | 3 +++ examples/code-llama/13b/lora.yml | 3 +++ examples/code-llama/13b/qlora.yml | 3 +++ examples/code-llama/34b/lora.yml | 3 +++ examples/code-llama/34b/qlora.yml | 3 +++ examples/code-llama/7b/lora.yml | 3 +++ examples/code-llama/7b/qlora.yml | 3 +++ examples/dbrx/16bit-lora.yaml | 3 +++ examples/dbrx/8bit-lora.yaml | 3 +++ examples/dbrx/fft-ds-zero3.yaml | 3 +++ examples/deepseek-v2/fft-fsdp-16b.yaml | 2 ++ examples/deepseek-v2/qlora-fsdp-2_5.yaml | 3 +++ examples/falcon/config-7b-lora.yml | 7 ++++++- examples/falcon/config-7b-qlora.yml | 9 +++++++-- examples/falcon/config-7b.yml | 7 ++++++- examples/gemma/qlora.yml | 3 +++ examples/gemma2/qlora.yml | 3 +++ examples/gemma2/reward-model.yaml | 3 +++ examples/gptj/qlora.yml | 3 +++ examples/jamba/qlora.yaml | 3 +++ examples/jamba/qlora_deepspeed.yaml | 2 ++ examples/jamba/qlora_fsdp_large.yaml | 3 +++ examples/jeopardy-bot/config.yml | 4 ++++ examples/llama-2/fft_optimized.yml | 3 +++ examples/llama-2/gptq-lora.yml | 9 +++++++-- examples/llama-2/lisa.yml | 3 +++ examples/llama-2/loftq.yml | 3 +++ examples/llama-2/lora.yml | 3 +++ examples/llama-2/qlora-fsdp.yml | 3 +++ examples/llama-2/qlora.yml | 3 +++ examples/llama-3-vision/lora-11b.yaml | 4 ++++ examples/llama-3/fft-8b-liger-fsdp.yaml | 2 ++ examples/llama-3/fft-8b.yaml | 2 ++ examples/llama-3/instruct-dpo-lora-8b.yml | 3 +++ examples/llama-3/instruct-lora-8b.yml | 3 +++ examples/llama-3/lora-1b-deduplicate-dpo.yml | 3 +++ examples/llama-3/lora-1b-deduplicate-sft.yml | 3 +++ examples/llama-3/lora-1b.yml | 2 ++ examples/llama-3/lora-8b.yml | 3 +++ examples/llama-3/qlora-1b-kto.yaml | 2 ++ examples/llama-3/qlora-1b.yml | 2 ++ examples/llama-3/qlora-fsdp-405b.yaml | 3 +++ examples/llama-3/qlora-fsdp-70b.yaml | 3 +++ examples/llama-3/qlora.yml | 3 +++ examples/mamba/config.yml | 3 +++ examples/mistral/bigstral-ds-zero3.yaml | 4 ++++ examples/mistral/config.yml | 3 +++ examples/mistral/lora-mps.yml | 3 +++ examples/mistral/lora.yml | 3 +++ examples/mistral/mistral-dpo-qlora.yml | 3 +++ examples/mistral/mistral-qlora-fsdp.yml | 4 ++++ examples/mistral/mistral-qlora-orpo.yml | 3 +++ examples/mistral/mixtral-8x22b-qlora-fsdp.yml | 3 +++ examples/mistral/mixtral-qlora-fsdp.yml | 4 ++++ examples/mistral/mixtral.yml | 4 ++++ examples/mistral/mixtral_22.yml | 4 ++++ examples/mistral/qlora.yml | 3 +++ examples/mpt-7b/config.yml | 4 ++++ examples/openllama-3b/config.yml | 4 ++++ examples/openllama-3b/lora.yml | 4 ++++ examples/openllama-3b/qlora.yml | 4 ++++ examples/phi/lora-3.5.yaml | 3 +++ examples/phi/phi-ft.yml | 3 +++ examples/phi/phi-qlora.yml | 3 +++ examples/phi/phi2-ft.yml | 3 +++ examples/phi/phi3-ft-fsdp.yml | 3 +++ examples/phi/phi3-ft.yml | 4 ++++ examples/pythia-12b/config.yml | 4 ++++ examples/pythia/lora.yml | 3 +++ examples/qwen/lora.yml | 3 +++ examples/qwen/qlora.yml | 3 +++ examples/qwen/qwen2-moe-lora.yaml | 3 +++ examples/qwen/qwen2-moe-qlora.yaml | 3 +++ examples/qwen2/dpo.yaml | 2 ++ examples/qwen2/qlora-fsdp.yaml | 3 +++ examples/redpajama/config-3b.yml | 4 ++++ examples/replit-3b/config-lora.yml | 3 +++ examples/stablelm-2/1.6b/fft.yml | 4 ++++ examples/stablelm-2/1.6b/lora.yml | 4 ++++ examples/starcoder2/qlora.yml | 2 ++ examples/tiny-llama/lora-mps.yml | 3 +++ examples/tiny-llama/lora.yml | 3 +++ examples/tiny-llama/pretrain.yml | 4 +++- examples/tiny-llama/qlora.yml | 3 +++ examples/xgen-7b/xgen-7b-8k-qlora.yml | 7 ++++++- examples/yi-34B-chat/qlora.yml | 3 +++ 87 files changed, 286 insertions(+), 8 deletions(-) diff --git a/examples/cerebras/btlm-ft.yml b/examples/cerebras/btlm-ft.yml index ba4e65daa..780616e04 100644 --- a/examples/cerebras/btlm-ft.yml +++ b/examples/cerebras/btlm-ft.yml @@ -1,6 +1,10 @@ base_model: cerebras/btlm-3b-8k-base +# optionally might have model_type or tokenizer_type model_type: AutoModelForCausalLM tokenizer_type: GPT2Tokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + trust_remote_code: true tokenizer_use_fast: true tokenizer_legacy: true diff --git a/examples/cerebras/qlora.yml b/examples/cerebras/qlora.yml index 285607a4c..866b4ab58 100644 --- a/examples/cerebras/qlora.yml +++ b/examples/cerebras/qlora.yml @@ -1,4 +1,7 @@ base_model: cerebras/Cerebras-GPT-1.3B +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + load_in_8bit: false load_in_4bit: true strict: false diff --git a/examples/code-llama/13b/lora.yml b/examples/code-llama/13b/lora.yml index 0ba96cfaa..2b8a720b2 100644 --- a/examples/code-llama/13b/lora.yml +++ b/examples/code-llama/13b/lora.yml @@ -1,6 +1,9 @@ base_model: codellama/CodeLlama-13b-hf +# optionally might have model_type or tokenizer_type model_type: LlamaForCausalLM tokenizer_type: CodeLlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: true load_in_4bit: false diff --git a/examples/code-llama/13b/qlora.yml b/examples/code-llama/13b/qlora.yml index 787862d01..92aa6ac97 100644 --- a/examples/code-llama/13b/qlora.yml +++ b/examples/code-llama/13b/qlora.yml @@ -1,6 +1,9 @@ base_model: codellama/CodeLlama-13b-hf +# optionally might have model_type or tokenizer_type model_type: LlamaForCausalLM tokenizer_type: CodeLlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: false load_in_4bit: true diff --git a/examples/code-llama/34b/lora.yml b/examples/code-llama/34b/lora.yml index 92d4c544a..af343e389 100644 --- a/examples/code-llama/34b/lora.yml +++ b/examples/code-llama/34b/lora.yml @@ -1,6 +1,9 @@ base_model: codellama/CodeLlama-34b-hf +# optionally might have model_type or tokenizer_type model_type: LlamaForCausalLM tokenizer_type: CodeLlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: true load_in_4bit: false diff --git a/examples/code-llama/34b/qlora.yml b/examples/code-llama/34b/qlora.yml index 93a6de877..f45e9205f 100644 --- a/examples/code-llama/34b/qlora.yml +++ b/examples/code-llama/34b/qlora.yml @@ -1,6 +1,9 @@ base_model: codellama/CodeLlama-34b-hf +# optionally might have model_type or tokenizer_type model_type: LlamaForCausalLM tokenizer_type: CodeLlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: false load_in_4bit: true diff --git a/examples/code-llama/7b/lora.yml b/examples/code-llama/7b/lora.yml index d13f50532..6c385dbcb 100644 --- a/examples/code-llama/7b/lora.yml +++ b/examples/code-llama/7b/lora.yml @@ -1,6 +1,9 @@ base_model: codellama/CodeLlama-7b-hf +# optionally might have model_type or tokenizer_type model_type: LlamaForCausalLM tokenizer_type: CodeLlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: true load_in_4bit: false diff --git a/examples/code-llama/7b/qlora.yml b/examples/code-llama/7b/qlora.yml index a1026a982..ccd256406 100644 --- a/examples/code-llama/7b/qlora.yml +++ b/examples/code-llama/7b/qlora.yml @@ -1,6 +1,9 @@ base_model: codellama/CodeLlama-7b-hf +# optionally might have model_type or tokenizer_type model_type: LlamaForCausalLM tokenizer_type: CodeLlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: false load_in_4bit: true diff --git a/examples/dbrx/16bit-lora.yaml b/examples/dbrx/16bit-lora.yaml index 32b625ac6..645ba1d59 100644 --- a/examples/dbrx/16bit-lora.yaml +++ b/examples/dbrx/16bit-lora.yaml @@ -1,4 +1,7 @@ base_model: LnL-AI/dbrx-base-converted-v2 +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + trust_remote_code: true load_in_8bit: false diff --git a/examples/dbrx/8bit-lora.yaml b/examples/dbrx/8bit-lora.yaml index 50ee0a016..4b9f60756 100644 --- a/examples/dbrx/8bit-lora.yaml +++ b/examples/dbrx/8bit-lora.yaml @@ -1,4 +1,7 @@ base_model: LnL-AI/dbrx-base-converted-v2 +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + trust_remote_code: true load_in_8bit: true diff --git a/examples/dbrx/fft-ds-zero3.yaml b/examples/dbrx/fft-ds-zero3.yaml index 60dc201ee..e42b63670 100644 --- a/examples/dbrx/fft-ds-zero3.yaml +++ b/examples/dbrx/fft-ds-zero3.yaml @@ -1,4 +1,7 @@ base_model: LnL-AI/dbrx-base-converted-v2 +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + trust_remote_code: true load_in_8bit: false diff --git a/examples/deepseek-v2/fft-fsdp-16b.yaml b/examples/deepseek-v2/fft-fsdp-16b.yaml index b55646df7..3d4608a01 100644 --- a/examples/deepseek-v2/fft-fsdp-16b.yaml +++ b/examples/deepseek-v2/fft-fsdp-16b.yaml @@ -1,4 +1,6 @@ base_model: deepseek-ai/DeepSeek-V2-Lite +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name trust_remote_code: true load_in_8bit: false diff --git a/examples/deepseek-v2/qlora-fsdp-2_5.yaml b/examples/deepseek-v2/qlora-fsdp-2_5.yaml index 6b8771d81..a89dc343a 100644 --- a/examples/deepseek-v2/qlora-fsdp-2_5.yaml +++ b/examples/deepseek-v2/qlora-fsdp-2_5.yaml @@ -1,4 +1,7 @@ base_model: axolotl-quants/DeepSeek-V2.5-bnb-nf4-bf16 +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + trust_remote_code: true load_in_8bit: false diff --git a/examples/falcon/config-7b-lora.yml b/examples/falcon/config-7b-lora.yml index 029ca40e0..efbe38d4a 100644 --- a/examples/falcon/config-7b-lora.yml +++ b/examples/falcon/config-7b-lora.yml @@ -1,7 +1,12 @@ base_model: tiiuae/falcon-7b -trust_remote_code: true +# optionally might have model_type or tokenizer_type model_type: AutoModelForCausalLM tokenizer_type: AutoTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + +# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main +trust_remote_code: true load_in_8bit: true load_in_4bit: false diff --git a/examples/falcon/config-7b-qlora.yml b/examples/falcon/config-7b-qlora.yml index 4e34144ed..b9829db5f 100644 --- a/examples/falcon/config-7b-qlora.yml +++ b/examples/falcon/config-7b-qlora.yml @@ -1,10 +1,15 @@ # 1b: tiiuae/falcon-rw-1b # 40b: tiiuae/falcon-40b base_model: tiiuae/falcon-7b -# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main -trust_remote_code: true +# optionally might have model_type or tokenizer_type model_type: AutoModelForCausalLM tokenizer_type: AutoTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + +# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main +trust_remote_code: true + load_in_8bit: false # enable 4bit for QLoRA diff --git a/examples/falcon/config-7b.yml b/examples/falcon/config-7b.yml index 36264f063..5e41a1e33 100644 --- a/examples/falcon/config-7b.yml +++ b/examples/falcon/config-7b.yml @@ -1,7 +1,12 @@ base_model: tiiuae/falcon-7b -trust_remote_code: true +# optionally might have model_type or tokenizer_type model_type: AutoModelForCausalLM tokenizer_type: AutoTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + +# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main +trust_remote_code: true load_in_8bit: false load_in_4bit: false diff --git a/examples/gemma/qlora.yml b/examples/gemma/qlora.yml index e08facfc5..80a9fe62f 100644 --- a/examples/gemma/qlora.yml +++ b/examples/gemma/qlora.yml @@ -1,7 +1,10 @@ # use google/gemma-7b if you have access base_model: mhenrichsen/gemma-7b +# optionally might have model_type or tokenizer_type model_type: AutoModelForCausalLM tokenizer_type: AutoTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: false load_in_4bit: true diff --git a/examples/gemma2/qlora.yml b/examples/gemma2/qlora.yml index 00e6d84e0..61a2ad876 100644 --- a/examples/gemma2/qlora.yml +++ b/examples/gemma2/qlora.yml @@ -1,6 +1,9 @@ base_model: google/gemma-2-9b +# optionally might have model_type or tokenizer_type model_type: AutoModelForCausalLM tokenizer_type: AutoTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: false load_in_4bit: true diff --git a/examples/gemma2/reward-model.yaml b/examples/gemma2/reward-model.yaml index c1f993c3a..b492c6f93 100644 --- a/examples/gemma2/reward-model.yaml +++ b/examples/gemma2/reward-model.yaml @@ -1,6 +1,9 @@ base_model: google/gemma-2-2b +# optionally might have model_type or tokenizer_type model_type: AutoModelForSequenceClassification tokenizer_type: AutoTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: false load_in_4bit: false diff --git a/examples/gptj/qlora.yml b/examples/gptj/qlora.yml index f801729fa..ddd6d24c0 100644 --- a/examples/gptj/qlora.yml +++ b/examples/gptj/qlora.yml @@ -1,4 +1,7 @@ base_model: EleutherAI/gpt-j-6b +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + load_in_8bit: false load_in_4bit: true strict: false diff --git a/examples/jamba/qlora.yaml b/examples/jamba/qlora.yaml index 3d6f69e79..cab62513c 100644 --- a/examples/jamba/qlora.yaml +++ b/examples/jamba/qlora.yaml @@ -1,4 +1,7 @@ base_model: ai21labs/Jamba-v0.1 +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + trust_remote_code: true load_in_8bit: false diff --git a/examples/jamba/qlora_deepspeed.yaml b/examples/jamba/qlora_deepspeed.yaml index 43a76c00b..7ac7bfac5 100644 --- a/examples/jamba/qlora_deepspeed.yaml +++ b/examples/jamba/qlora_deepspeed.yaml @@ -1,4 +1,6 @@ base_model: ai21labs/Jamba-v0.1 +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name trust_remote_code: true load_in_8bit: false diff --git a/examples/jamba/qlora_fsdp_large.yaml b/examples/jamba/qlora_fsdp_large.yaml index 84cf90642..8736680da 100644 --- a/examples/jamba/qlora_fsdp_large.yaml +++ b/examples/jamba/qlora_fsdp_large.yaml @@ -1,5 +1,8 @@ base_model: ai21labs/AI21-Jamba-1.5-Large +# optionally might have model_type or tokenizer_type tokenizer_type: AutoTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_4bit: true strict: false diff --git a/examples/jeopardy-bot/config.yml b/examples/jeopardy-bot/config.yml index 088629c08..04f92a8dc 100644 --- a/examples/jeopardy-bot/config.yml +++ b/examples/jeopardy-bot/config.yml @@ -1,6 +1,10 @@ base_model: huggyllama/llama-7b +# optionally might have model_type or tokenizer_type model_type: LlamaForCausalLM tokenizer_type: LlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + load_in_8bit: false datasets: - path: openaccess-ai-collective/jeopardy diff --git a/examples/llama-2/fft_optimized.yml b/examples/llama-2/fft_optimized.yml index 3d94b04b8..3475fcd9a 100644 --- a/examples/llama-2/fft_optimized.yml +++ b/examples/llama-2/fft_optimized.yml @@ -1,6 +1,9 @@ base_model: NousResearch/Llama-2-7b-hf +# optionally might have model_type or tokenizer_type model_type: LlamaForCausalLM tokenizer_type: LlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: false load_in_4bit: false diff --git a/examples/llama-2/gptq-lora.yml b/examples/llama-2/gptq-lora.yml index 2a706265b..7e45f7d63 100644 --- a/examples/llama-2/gptq-lora.yml +++ b/examples/llama-2/gptq-lora.yml @@ -1,8 +1,13 @@ base_model: TheBloke/Llama-2-7B-GPTQ -gptq: true -gptq_disable_exllama: true +# optionally might have model_type or tokenizer_type model_type: AutoModelForCausalLM tokenizer_type: LlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + +gptq: true +gptq_disable_exllama: true + tokenizer_use_fast: true tokenizer_legacy: true load_in_8bit: false diff --git a/examples/llama-2/lisa.yml b/examples/llama-2/lisa.yml index 7012d1f61..40391204c 100644 --- a/examples/llama-2/lisa.yml +++ b/examples/llama-2/lisa.yml @@ -1,6 +1,9 @@ base_model: NousResearch/Llama-2-7b-hf +# optionally might have model_type or tokenizer_type model_type: LlamaForCausalLM tokenizer_type: LlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: false load_in_4bit: false diff --git a/examples/llama-2/loftq.yml b/examples/llama-2/loftq.yml index 68d9ac014..a5108e70f 100644 --- a/examples/llama-2/loftq.yml +++ b/examples/llama-2/loftq.yml @@ -1,6 +1,9 @@ base_model: NousResearch/Llama-2-7b-hf +# optionally might have model_type or tokenizer_type model_type: LlamaForCausalLM tokenizer_type: LlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: false load_in_4bit: false diff --git a/examples/llama-2/lora.yml b/examples/llama-2/lora.yml index 95bfae692..ec0c80012 100644 --- a/examples/llama-2/lora.yml +++ b/examples/llama-2/lora.yml @@ -1,6 +1,9 @@ base_model: NousResearch/Llama-2-7b-hf +# optionally might have model_type or tokenizer_type model_type: LlamaForCausalLM tokenizer_type: LlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: true load_in_4bit: false diff --git a/examples/llama-2/qlora-fsdp.yml b/examples/llama-2/qlora-fsdp.yml index 88029f92d..204c91693 100644 --- a/examples/llama-2/qlora-fsdp.yml +++ b/examples/llama-2/qlora-fsdp.yml @@ -1,6 +1,9 @@ base_model: NousResearch/Llama-2-7b-hf +# optionally might have model_type or tokenizer_type model_type: LlamaForCausalLM tokenizer_type: LlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: false load_in_4bit: true diff --git a/examples/llama-2/qlora.yml b/examples/llama-2/qlora.yml index dda32170b..81d1acbec 100644 --- a/examples/llama-2/qlora.yml +++ b/examples/llama-2/qlora.yml @@ -1,6 +1,9 @@ base_model: NousResearch/Llama-2-7b-hf +# optionally might have model_type or tokenizer_type model_type: LlamaForCausalLM tokenizer_type: LlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: false load_in_4bit: true diff --git a/examples/llama-3-vision/lora-11b.yaml b/examples/llama-3-vision/lora-11b.yaml index b2e494641..22dc3a9af 100644 --- a/examples/llama-3-vision/lora-11b.yaml +++ b/examples/llama-3-vision/lora-11b.yaml @@ -1,5 +1,9 @@ base_model: alpindale/Llama-3.2-11B-Vision-Instruct +# optionally might have model_type or tokenizer_type or processor_type processor_type: AutoProcessor +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + strict: false # these 3 lines are needed for now to handle vision chat templates w images diff --git a/examples/llama-3/fft-8b-liger-fsdp.yaml b/examples/llama-3/fft-8b-liger-fsdp.yaml index 043b5c980..2c8589b17 100644 --- a/examples/llama-3/fft-8b-liger-fsdp.yaml +++ b/examples/llama-3/fft-8b-liger-fsdp.yaml @@ -1,4 +1,6 @@ base_model: NousResearch/Meta-Llama-3.1-8B +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name plugins: - axolotl.integrations.liger.LigerPlugin diff --git a/examples/llama-3/fft-8b.yaml b/examples/llama-3/fft-8b.yaml index 335902aac..a129c6e5b 100644 --- a/examples/llama-3/fft-8b.yaml +++ b/examples/llama-3/fft-8b.yaml @@ -1,4 +1,6 @@ base_model: NousResearch/Meta-Llama-3.1-8B +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: false load_in_4bit: false diff --git a/examples/llama-3/instruct-dpo-lora-8b.yml b/examples/llama-3/instruct-dpo-lora-8b.yml index dc8835035..bb58b677a 100644 --- a/examples/llama-3/instruct-dpo-lora-8b.yml +++ b/examples/llama-3/instruct-dpo-lora-8b.yml @@ -1,6 +1,9 @@ base_model: meta-llama/Meta-Llama-3-8B-Instruct +# optionally might have model_type or tokenizer_type model_type: LlamaForCausalLM tokenizer_type: AutoTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: true load_in_4bit: false diff --git a/examples/llama-3/instruct-lora-8b.yml b/examples/llama-3/instruct-lora-8b.yml index ae9a8088c..853f85d74 100644 --- a/examples/llama-3/instruct-lora-8b.yml +++ b/examples/llama-3/instruct-lora-8b.yml @@ -1,6 +1,9 @@ base_model: NousResearch/Meta-Llama-3-8B-Instruct +# optionally might have model_type or tokenizer_type model_type: LlamaForCausalLM tokenizer_type: AutoTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: true load_in_4bit: false diff --git a/examples/llama-3/lora-1b-deduplicate-dpo.yml b/examples/llama-3/lora-1b-deduplicate-dpo.yml index 35a0260ca..00314da75 100644 --- a/examples/llama-3/lora-1b-deduplicate-dpo.yml +++ b/examples/llama-3/lora-1b-deduplicate-dpo.yml @@ -1,6 +1,9 @@ base_model: meta-llama/Llama-3.2-1B +# optionally might have model_type or tokenizer_type model_type: LlamaForCausalLM tokenizer_type: AutoTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: true load_in_4bit: false diff --git a/examples/llama-3/lora-1b-deduplicate-sft.yml b/examples/llama-3/lora-1b-deduplicate-sft.yml index c07d5f8ff..451696465 100644 --- a/examples/llama-3/lora-1b-deduplicate-sft.yml +++ b/examples/llama-3/lora-1b-deduplicate-sft.yml @@ -1,6 +1,9 @@ base_model: meta-llama/Llama-3.2-1B +# optionally might have model_type or tokenizer_type model_type: LlamaForCausalLM tokenizer_type: AutoTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: true load_in_4bit: false diff --git a/examples/llama-3/lora-1b.yml b/examples/llama-3/lora-1b.yml index bdda2ed75..a1c3afa87 100644 --- a/examples/llama-3/lora-1b.yml +++ b/examples/llama-3/lora-1b.yml @@ -1,4 +1,6 @@ base_model: NousResearch/Llama-3.2-1B +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: false load_in_4bit: false diff --git a/examples/llama-3/lora-8b.yml b/examples/llama-3/lora-8b.yml index a20a529f5..7921857ce 100644 --- a/examples/llama-3/lora-8b.yml +++ b/examples/llama-3/lora-8b.yml @@ -1,6 +1,9 @@ base_model: NousResearch/Meta-Llama-3-8B +# optionally might have model_type or tokenizer_type model_type: LlamaForCausalLM tokenizer_type: AutoTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: true load_in_4bit: false diff --git a/examples/llama-3/qlora-1b-kto.yaml b/examples/llama-3/qlora-1b-kto.yaml index a876d8fd7..df4a08489 100644 --- a/examples/llama-3/qlora-1b-kto.yaml +++ b/examples/llama-3/qlora-1b-kto.yaml @@ -1,4 +1,6 @@ base_model: meta-llama/Llama-3.2-1B +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: false load_in_4bit: true diff --git a/examples/llama-3/qlora-1b.yml b/examples/llama-3/qlora-1b.yml index be40db846..226bbb237 100644 --- a/examples/llama-3/qlora-1b.yml +++ b/examples/llama-3/qlora-1b.yml @@ -1,4 +1,6 @@ base_model: NousResearch/Llama-3.2-1B +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: false load_in_4bit: true diff --git a/examples/llama-3/qlora-fsdp-405b.yaml b/examples/llama-3/qlora-fsdp-405b.yaml index 6eeec01c9..a60a97ef3 100644 --- a/examples/llama-3/qlora-fsdp-405b.yaml +++ b/examples/llama-3/qlora-fsdp-405b.yaml @@ -1,5 +1,8 @@ base_model: hugging-quants/Meta-Llama-3.1-405B-BNB-NF4-BF16 +# optionally might have model_type or tokenizer_type tokenizer_type: AutoTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_4bit: true strict: false diff --git a/examples/llama-3/qlora-fsdp-70b.yaml b/examples/llama-3/qlora-fsdp-70b.yaml index 9b74f6b4d..932e1a0d6 100644 --- a/examples/llama-3/qlora-fsdp-70b.yaml +++ b/examples/llama-3/qlora-fsdp-70b.yaml @@ -1,6 +1,9 @@ base_model: casperhansen/llama-3-70b-fp16 +# optionally might have model_type or tokenizer_type model_type: LlamaForCausalLM tokenizer_type: AutoTokenizer # PreTrainedTokenizerFast +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: false load_in_4bit: true diff --git a/examples/llama-3/qlora.yml b/examples/llama-3/qlora.yml index 079c9cad0..64268a205 100644 --- a/examples/llama-3/qlora.yml +++ b/examples/llama-3/qlora.yml @@ -1,6 +1,9 @@ base_model: NousResearch/Meta-Llama-3-8B +# optionally might have model_type or tokenizer_type model_type: AutoModelForCausalLM tokenizer_type: AutoTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: false load_in_4bit: true diff --git a/examples/mamba/config.yml b/examples/mamba/config.yml index f88f5138d..ca96fbfc3 100644 --- a/examples/mamba/config.yml +++ b/examples/mamba/config.yml @@ -1,7 +1,10 @@ base_model: state-spaces/mamba-2.8b +# optionally might have model_type or tokenizer_type or tokenizer_config model_type: MambaLMHeadModel tokenizer_type: AutoTokenizer tokenizer_config: EleutherAI/gpt-neox-20b +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: false load_in_4bit: false diff --git a/examples/mistral/bigstral-ds-zero3.yaml b/examples/mistral/bigstral-ds-zero3.yaml index e993e44a7..5ee214c1b 100644 --- a/examples/mistral/bigstral-ds-zero3.yaml +++ b/examples/mistral/bigstral-ds-zero3.yaml @@ -1,6 +1,10 @@ base_model: mistral-community/Mixtral-8x22B-v0.1 +# optionally might have model_type or tokenizer_type model_type: AutoModelForCausalLM tokenizer_type: LlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + trust_remote_code: true load_in_8bit: false diff --git a/examples/mistral/config.yml b/examples/mistral/config.yml index a70937c4f..890203339 100644 --- a/examples/mistral/config.yml +++ b/examples/mistral/config.yml @@ -1,6 +1,9 @@ base_model: mistralai/Mistral-7B-v0.1 +# optionally might have model_type or tokenizer_type model_type: MistralForCausalLM tokenizer_type: LlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: false load_in_4bit: false diff --git a/examples/mistral/lora-mps.yml b/examples/mistral/lora-mps.yml index 03c74bb59..c1df9896c 100644 --- a/examples/mistral/lora-mps.yml +++ b/examples/mistral/lora-mps.yml @@ -1,6 +1,9 @@ base_model: mistralai/Mistral-7B-v0.1 +# optionally might have model_type or tokenizer_type model_type: MistralForCausalLM tokenizer_type: LlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: false load_in_4bit: false diff --git a/examples/mistral/lora.yml b/examples/mistral/lora.yml index 0d5dc9edd..11c1e0ee7 100644 --- a/examples/mistral/lora.yml +++ b/examples/mistral/lora.yml @@ -1,6 +1,9 @@ base_model: mistralai/Mistral-7B-v0.1 +# optionally might have model_type or tokenizer_type model_type: MistralForCausalLM tokenizer_type: LlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: true load_in_4bit: false diff --git a/examples/mistral/mistral-dpo-qlora.yml b/examples/mistral/mistral-dpo-qlora.yml index a558e0453..e2eb6a264 100644 --- a/examples/mistral/mistral-dpo-qlora.yml +++ b/examples/mistral/mistral-dpo-qlora.yml @@ -4,8 +4,11 @@ #face problems with the special tokens. base_model: mistralai/Mistral-7B-Instruct-v0.2 +# optionally might have model_type or tokenizer_type model_type: MistralForCausalLM tokenizer_type: LlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: false load_in_4bit: true diff --git a/examples/mistral/mistral-qlora-fsdp.yml b/examples/mistral/mistral-qlora-fsdp.yml index e6b07c594..521f4de5f 100644 --- a/examples/mistral/mistral-qlora-fsdp.yml +++ b/examples/mistral/mistral-qlora-fsdp.yml @@ -1,6 +1,10 @@ base_model: mistralai/Mixtral-8x7B-v0.1 +# optionally might have model_type or tokenizer_type model_type: AutoModelForCausalLM tokenizer_type: LlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + trust_remote_code: true load_in_8bit: false diff --git a/examples/mistral/mistral-qlora-orpo.yml b/examples/mistral/mistral-qlora-orpo.yml index 2549ef018..82f30dc17 100644 --- a/examples/mistral/mistral-qlora-orpo.yml +++ b/examples/mistral/mistral-qlora-orpo.yml @@ -1,6 +1,9 @@ base_model: mistralai/Mistral-7B-v0.1 +# optionally might have model_type or tokenizer_type model_type: MistralForCausalLM tokenizer_type: LlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: false load_in_4bit: true diff --git a/examples/mistral/mixtral-8x22b-qlora-fsdp.yml b/examples/mistral/mixtral-8x22b-qlora-fsdp.yml index fe68b2817..4a65b1a7d 100644 --- a/examples/mistral/mixtral-8x22b-qlora-fsdp.yml +++ b/examples/mistral/mixtral-8x22b-qlora-fsdp.yml @@ -1,6 +1,9 @@ base_model: mistral-community/Mixtral-8x22B-v0.1 +# optionally might have model_type or tokenizer_type model_type: AutoModelForCausalLM tokenizer_type: LlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: false load_in_4bit: true diff --git a/examples/mistral/mixtral-qlora-fsdp.yml b/examples/mistral/mixtral-qlora-fsdp.yml index c09597040..fbd9bd937 100644 --- a/examples/mistral/mixtral-qlora-fsdp.yml +++ b/examples/mistral/mixtral-qlora-fsdp.yml @@ -1,6 +1,10 @@ base_model: mistralai/Mixtral-8x7B-v0.1 +# optionally might have model_type or tokenizer_type model_type: AutoModelForCausalLM tokenizer_type: LlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + trust_remote_code: true load_in_8bit: false diff --git a/examples/mistral/mixtral.yml b/examples/mistral/mixtral.yml index 13fbe92ab..ac80ec933 100644 --- a/examples/mistral/mixtral.yml +++ b/examples/mistral/mixtral.yml @@ -1,6 +1,10 @@ base_model: mistralai/Mixtral-8x7B-v0.1 +# optionally might have model_type or tokenizer_type model_type: AutoModelForCausalLM tokenizer_type: LlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + trust_remote_code: true load_in_8bit: false diff --git a/examples/mistral/mixtral_22.yml b/examples/mistral/mixtral_22.yml index 9a1e86386..7f2a72212 100644 --- a/examples/mistral/mixtral_22.yml +++ b/examples/mistral/mixtral_22.yml @@ -1,6 +1,10 @@ base_model: mistral-community/Mixtral-8x22B-v0.1 +# optionally might have model_type or tokenizer_type model_type: AutoModelForCausalLM tokenizer_type: LlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + trust_remote_code: true load_in_8bit: false diff --git a/examples/mistral/qlora.yml b/examples/mistral/qlora.yml index c7bdb155c..5f3fa10b8 100644 --- a/examples/mistral/qlora.yml +++ b/examples/mistral/qlora.yml @@ -1,6 +1,9 @@ base_model: mistralai/Mistral-7B-v0.1 +# optionally might have model_type or tokenizer_type model_type: MistralForCausalLM tokenizer_type: LlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: false load_in_4bit: true diff --git a/examples/mpt-7b/config.yml b/examples/mpt-7b/config.yml index 530415de1..cf4b433fe 100644 --- a/examples/mpt-7b/config.yml +++ b/examples/mpt-7b/config.yml @@ -1,5 +1,9 @@ base_model: mosaicml/mpt-7b +# optionally might have model_type or tokenizer_type tokenizer_type: AutoTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + trust_remote_code: true # required for mpt as their model class is not merged into transformers yet load_in_8bit: false datasets: diff --git a/examples/openllama-3b/config.yml b/examples/openllama-3b/config.yml index a0473213c..ec66014b4 100644 --- a/examples/openllama-3b/config.yml +++ b/examples/openllama-3b/config.yml @@ -1,6 +1,10 @@ base_model: openlm-research/open_llama_3b_v2 +# optionally might have model_type or tokenizer_type model_type: LlamaForCausalLM tokenizer_type: LlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + load_in_8bit: false load_in_4bit: false strict: false diff --git a/examples/openllama-3b/lora.yml b/examples/openllama-3b/lora.yml index 2b6784915..b449df9ae 100644 --- a/examples/openllama-3b/lora.yml +++ b/examples/openllama-3b/lora.yml @@ -1,6 +1,10 @@ base_model: openlm-research/open_llama_3b_v2 +# optionally might have model_type or tokenizer_type model_type: LlamaForCausalLM tokenizer_type: LlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + load_in_8bit: true load_in_4bit: false strict: false diff --git a/examples/openllama-3b/qlora.yml b/examples/openllama-3b/qlora.yml index 8d4dc05ca..3efcdabc6 100644 --- a/examples/openllama-3b/qlora.yml +++ b/examples/openllama-3b/qlora.yml @@ -1,6 +1,10 @@ base_model: openlm-research/open_llama_3b_v2 +# optionally might have model_type or tokenizer_type model_type: LlamaForCausalLM tokenizer_type: LlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + load_in_8bit: false load_in_4bit: true strict: false diff --git a/examples/phi/lora-3.5.yaml b/examples/phi/lora-3.5.yaml index 246701148..8c0205f4c 100644 --- a/examples/phi/lora-3.5.yaml +++ b/examples/phi/lora-3.5.yaml @@ -1,6 +1,9 @@ base_model: microsoft/Phi-3.5-mini-instruct +# optionally might have model_type or tokenizer_type model_type: AutoModelForCausalLM tokenizer_type: AutoTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: true load_in_4bit: false diff --git a/examples/phi/phi-ft.yml b/examples/phi/phi-ft.yml index 0dabadc7a..fc5848dc5 100644 --- a/examples/phi/phi-ft.yml +++ b/examples/phi/phi-ft.yml @@ -1,6 +1,9 @@ base_model: microsoft/phi-1_5 +# optionally might have model_type or tokenizer_type model_type: AutoModelForCausalLM tokenizer_type: AutoTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: false load_in_4bit: false diff --git a/examples/phi/phi-qlora.yml b/examples/phi/phi-qlora.yml index 7c181a3c1..a98cd1040 100644 --- a/examples/phi/phi-qlora.yml +++ b/examples/phi/phi-qlora.yml @@ -1,6 +1,9 @@ base_model: microsoft/phi-1_5 +# optionally might have model_type or tokenizer_type model_type: AutoModelForCausalLM tokenizer_type: AutoTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: false load_in_4bit: true diff --git a/examples/phi/phi2-ft.yml b/examples/phi/phi2-ft.yml index 27815550b..0f656f821 100644 --- a/examples/phi/phi2-ft.yml +++ b/examples/phi/phi2-ft.yml @@ -1,6 +1,9 @@ base_model: microsoft/phi-2 +# optionally might have model_type or tokenizer_type model_type: AutoModelForCausalLM tokenizer_type: AutoTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: false load_in_4bit: false diff --git a/examples/phi/phi3-ft-fsdp.yml b/examples/phi/phi3-ft-fsdp.yml index d220e577d..c081e47b9 100644 --- a/examples/phi/phi3-ft-fsdp.yml +++ b/examples/phi/phi3-ft-fsdp.yml @@ -1,6 +1,9 @@ base_model: microsoft/Phi-3-mini-4k-instruct +# optionally might have model_type or tokenizer_type model_type: AutoModelForCausalLM tokenizer_type: AutoTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: false load_in_4bit: false diff --git a/examples/phi/phi3-ft.yml b/examples/phi/phi3-ft.yml index 18db9b8b7..ac42153af 100644 --- a/examples/phi/phi3-ft.yml +++ b/examples/phi/phi3-ft.yml @@ -1,7 +1,11 @@ base_model: microsoft/Phi-3-mini-4k-instruct +# optionally might have model_type or tokenizer_type trust_remote_code: true model_type: AutoModelForCausalLM tokenizer_type: AutoTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + chat_template: phi_3 load_in_8bit: false diff --git a/examples/pythia-12b/config.yml b/examples/pythia-12b/config.yml index 18e6beaaf..52ab77055 100644 --- a/examples/pythia-12b/config.yml +++ b/examples/pythia-12b/config.yml @@ -1,7 +1,11 @@ base_model: EleutherAI/pythia-12b-deduped base_model_ignore_patterns: pytorch* # prefer safetensors +# optionally might have model_type or tokenizer_type model_type: GPTNeoXForCausalLM tokenizer_type: AutoTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + load_in_8bit: false load_in_4bit: false gptq: false diff --git a/examples/pythia/lora.yml b/examples/pythia/lora.yml index 0aa650f67..203652f6b 100644 --- a/examples/pythia/lora.yml +++ b/examples/pythia/lora.yml @@ -1,4 +1,7 @@ base_model: EleutherAI/pythia-1.4b-deduped +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + load_in_8bit: true datasets: - path: teknium/GPT4-LLM-Cleaned diff --git a/examples/qwen/lora.yml b/examples/qwen/lora.yml index dd8dc1e4f..961125a51 100644 --- a/examples/qwen/lora.yml +++ b/examples/qwen/lora.yml @@ -1,6 +1,9 @@ base_model: Qwen/Qwen-7B +# optionally might have model_type or tokenizer_type model_type: AutoModelForCausalLM tokenizer_type: AutoTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name trust_remote_code: true diff --git a/examples/qwen/qlora.yml b/examples/qwen/qlora.yml index 01c0c0ab8..e7159eaa5 100644 --- a/examples/qwen/qlora.yml +++ b/examples/qwen/qlora.yml @@ -1,6 +1,9 @@ base_model: Qwen/Qwen-7B +# optionally might have model_type or tokenizer_type model_type: AutoModelForCausalLM tokenizer_type: AutoTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name trust_remote_code: true diff --git a/examples/qwen/qwen2-moe-lora.yaml b/examples/qwen/qwen2-moe-lora.yaml index 452335e38..b357b9344 100644 --- a/examples/qwen/qwen2-moe-lora.yaml +++ b/examples/qwen/qwen2-moe-lora.yaml @@ -1,4 +1,7 @@ base_model: Qwen/Qwen1.5-MoE-A2.7B +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + trust_remote_code: true load_in_8bit: false diff --git a/examples/qwen/qwen2-moe-qlora.yaml b/examples/qwen/qwen2-moe-qlora.yaml index bc11007c7..d45e4c89f 100644 --- a/examples/qwen/qwen2-moe-qlora.yaml +++ b/examples/qwen/qwen2-moe-qlora.yaml @@ -1,4 +1,7 @@ base_model: Qwen/Qwen1.5-MoE-A2.7B +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + trust_remote_code: true load_in_8bit: false diff --git a/examples/qwen2/dpo.yaml b/examples/qwen2/dpo.yaml index 64c3e7629..e924be195 100644 --- a/examples/qwen2/dpo.yaml +++ b/examples/qwen2/dpo.yaml @@ -1,4 +1,6 @@ base_model: Qwen/Qwen2.5-0.5B +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name strict: false diff --git a/examples/qwen2/qlora-fsdp.yaml b/examples/qwen2/qlora-fsdp.yaml index d61c72a37..cc4974908 100644 --- a/examples/qwen2/qlora-fsdp.yaml +++ b/examples/qwen2/qlora-fsdp.yaml @@ -1,4 +1,7 @@ base_model: Qwen/Qwen2-7B +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + trust_remote_code: true load_in_8bit: false diff --git a/examples/redpajama/config-3b.yml b/examples/redpajama/config-3b.yml index ff395a863..d716727a3 100644 --- a/examples/redpajama/config-3b.yml +++ b/examples/redpajama/config-3b.yml @@ -1,6 +1,10 @@ base_model: togethercomputer/RedPajama-INCITE-Chat-3B-v1 +# optionally might have model_type or tokenizer_type model_type: GPTNeoXForCausalLM tokenizer_type: AutoTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + trust_remote_code: load_in_8bit: false datasets: diff --git a/examples/replit-3b/config-lora.yml b/examples/replit-3b/config-lora.yml index 9fee099d4..bb2a6aace 100644 --- a/examples/replit-3b/config-lora.yml +++ b/examples/replit-3b/config-lora.yml @@ -1,4 +1,7 @@ base_model: replit/replit-code-v1-3b +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + trust_remote_code: true load_in_8bit: false datasets: diff --git a/examples/stablelm-2/1.6b/fft.yml b/examples/stablelm-2/1.6b/fft.yml index 777262a7e..3ecb1581b 100644 --- a/examples/stablelm-2/1.6b/fft.yml +++ b/examples/stablelm-2/1.6b/fft.yml @@ -1,6 +1,10 @@ base_model: stabilityai/stablelm-2-1_6b +# optionally might have model_type or tokenizer_type model_type: AutoModelForCausalLM tokenizer_type: AutoTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + trust_remote_code: true load_in_8bit: false diff --git a/examples/stablelm-2/1.6b/lora.yml b/examples/stablelm-2/1.6b/lora.yml index c65b9e4cd..8597de6a2 100644 --- a/examples/stablelm-2/1.6b/lora.yml +++ b/examples/stablelm-2/1.6b/lora.yml @@ -1,6 +1,10 @@ base_model: stabilityai/stablelm-2-1_6b +# optionally might have model_type or tokenizer_type model_type: AutoModelForCausalLM tokenizer_type: AutoTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + trust_remote_code: true load_in_8bit: true diff --git a/examples/starcoder2/qlora.yml b/examples/starcoder2/qlora.yml index 83fc0d89f..d1db71d6d 100644 --- a/examples/starcoder2/qlora.yml +++ b/examples/starcoder2/qlora.yml @@ -1,4 +1,6 @@ base_model: bigcode/starcoder2-3b +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: false load_in_4bit: true diff --git a/examples/tiny-llama/lora-mps.yml b/examples/tiny-llama/lora-mps.yml index 99e404e43..f949acd0f 100644 --- a/examples/tiny-llama/lora-mps.yml +++ b/examples/tiny-llama/lora-mps.yml @@ -1,6 +1,9 @@ base_model: TinyLlama/TinyLlama_v1.1 +# optionally might have model_type or tokenizer_type model_type: LlamaForCausalLM tokenizer_type: LlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: true load_in_4bit: false diff --git a/examples/tiny-llama/lora.yml b/examples/tiny-llama/lora.yml index 7c7fb6706..54aa5ec27 100644 --- a/examples/tiny-llama/lora.yml +++ b/examples/tiny-llama/lora.yml @@ -1,5 +1,8 @@ base_model: TinyLlama/TinyLlama_v1.1 +# optionally might have model_type or tokenizer_type tokenizer_type: AutoTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: true load_in_4bit: false diff --git a/examples/tiny-llama/pretrain.yml b/examples/tiny-llama/pretrain.yml index 010a1608a..fd6d2c9c1 100644 --- a/examples/tiny-llama/pretrain.yml +++ b/examples/tiny-llama/pretrain.yml @@ -1,7 +1,9 @@ base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0 - +# optionally might have model_type or tokenizer_type model_type: LlamaForCausalLM tokenizer_type: LlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: false load_in_4bit: false diff --git a/examples/tiny-llama/qlora.yml b/examples/tiny-llama/qlora.yml index 931fe03e0..694ab3a15 100644 --- a/examples/tiny-llama/qlora.yml +++ b/examples/tiny-llama/qlora.yml @@ -1,6 +1,9 @@ base_model: TinyLlama/TinyLlama_v1.1 +# optionally might have model_type or tokenizer_type model_type: LlamaForCausalLM tokenizer_type: LlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: false load_in_4bit: true diff --git a/examples/xgen-7b/xgen-7b-8k-qlora.yml b/examples/xgen-7b/xgen-7b-8k-qlora.yml index 7e3f83cbd..d798e326d 100644 --- a/examples/xgen-7b/xgen-7b-8k-qlora.yml +++ b/examples/xgen-7b/xgen-7b-8k-qlora.yml @@ -1,9 +1,14 @@ # An example finetuning Saleforce's XGen-7b model with 8k context using qlora # on Tim Dettmer's Guanaco dataset. base_model: Salesforce/xgen-7b-8k-base -trust_remote_code: true +# optionally might have model_type or tokenizer_type model_type: AutoModelForCausalLM tokenizer_type: AutoTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name + +trust_remote_code: true + load_in_8bit: false # enable 4bit for QLoRA load_in_4bit: true diff --git a/examples/yi-34B-chat/qlora.yml b/examples/yi-34B-chat/qlora.yml index 7fe322d63..b68d00883 100644 --- a/examples/yi-34B-chat/qlora.yml +++ b/examples/yi-34B-chat/qlora.yml @@ -1,6 +1,9 @@ base_model: 01-ai/Yi-34B-Chat +# optionally might have model_type or tokenizer_type model_type: LlamaForCausalLM tokenizer_type: LlamaTokenizer +# Automatically upload checkpoint and final model to HF +# hub_model_id: username/custom_model_name load_in_8bit: false load_in_4bit: true