From 1c14c4a15cb2cb70d90bedd0a1580e30314c90cb Mon Sep 17 00:00:00 2001
From: Sunny Liu <22844540+bursteratom@users.noreply.github.com>
Date: Tue, 17 Dec 2024 11:24:30 -0500
Subject: [PATCH] Add hub model id config options to all example yml files
 (#2196) [skip ci]

* added hub model_id in example yml

* add hub model id to example yml
---
 examples/cerebras/btlm-ft.yml                 | 4 ++++
 examples/cerebras/qlora.yml                   | 3 +++
 examples/code-llama/13b/lora.yml              | 3 +++
 examples/code-llama/13b/qlora.yml             | 3 +++
 examples/code-llama/34b/lora.yml              | 3 +++
 examples/code-llama/34b/qlora.yml             | 3 +++
 examples/code-llama/7b/lora.yml               | 3 +++
 examples/code-llama/7b/qlora.yml              | 3 +++
 examples/dbrx/16bit-lora.yaml                 | 3 +++
 examples/dbrx/8bit-lora.yaml                  | 3 +++
 examples/dbrx/fft-ds-zero3.yaml               | 3 +++
 examples/deepseek-v2/fft-fsdp-16b.yaml        | 2 ++
 examples/deepseek-v2/qlora-fsdp-2_5.yaml      | 3 +++
 examples/falcon/config-7b-lora.yml            | 7 ++++++-
 examples/falcon/config-7b-qlora.yml           | 9 +++++++--
 examples/falcon/config-7b.yml                 | 7 ++++++-
 examples/gemma/qlora.yml                      | 3 +++
 examples/gemma2/qlora.yml                     | 3 +++
 examples/gemma2/reward-model.yaml             | 3 +++
 examples/gptj/qlora.yml                       | 3 +++
 examples/jamba/qlora.yaml                     | 3 +++
 examples/jamba/qlora_deepspeed.yaml           | 2 ++
 examples/jamba/qlora_fsdp_large.yaml          | 3 +++
 examples/jeopardy-bot/config.yml              | 4 ++++
 examples/llama-2/fft_optimized.yml            | 3 +++
 examples/llama-2/gptq-lora.yml                | 9 +++++++--
 examples/llama-2/lisa.yml                     | 3 +++
 examples/llama-2/loftq.yml                    | 3 +++
 examples/llama-2/lora.yml                     | 3 +++
 examples/llama-2/qlora-fsdp.yml               | 3 +++
 examples/llama-2/qlora.yml                    | 3 +++
 examples/llama-3-vision/lora-11b.yaml         | 4 ++++
 examples/llama-3/fft-8b-liger-fsdp.yaml       | 2 ++
 examples/llama-3/fft-8b.yaml                  | 2 ++
 examples/llama-3/instruct-dpo-lora-8b.yml     | 3 +++
 examples/llama-3/instruct-lora-8b.yml         | 3 +++
 examples/llama-3/lora-1b-deduplicate-dpo.yml  | 3 +++
 examples/llama-3/lora-1b-deduplicate-sft.yml  | 3 +++
 examples/llama-3/lora-1b.yml                  | 2 ++
 examples/llama-3/lora-8b.yml                  | 3 +++
 examples/llama-3/qlora-1b-kto.yaml            | 2 ++
 examples/llama-3/qlora-1b.yml                 | 2 ++
 examples/llama-3/qlora-fsdp-405b.yaml         | 3 +++
 examples/llama-3/qlora-fsdp-70b.yaml          | 3 +++
 examples/llama-3/qlora.yml                    | 3 +++
 examples/mamba/config.yml                     | 3 +++
 examples/mistral/bigstral-ds-zero3.yaml       | 4 ++++
 examples/mistral/config.yml                   | 3 +++
 examples/mistral/lora-mps.yml                 | 3 +++
 examples/mistral/lora.yml                     | 3 +++
 examples/mistral/mistral-dpo-qlora.yml        | 3 +++
 examples/mistral/mistral-qlora-fsdp.yml       | 4 ++++
 examples/mistral/mistral-qlora-orpo.yml       | 3 +++
 examples/mistral/mixtral-8x22b-qlora-fsdp.yml | 3 +++
 examples/mistral/mixtral-qlora-fsdp.yml       | 4 ++++
 examples/mistral/mixtral.yml                  | 4 ++++
 examples/mistral/mixtral_22.yml               | 4 ++++
 examples/mistral/qlora.yml                    | 3 +++
 examples/mpt-7b/config.yml                    | 4 ++++
 examples/openllama-3b/config.yml              | 4 ++++
 examples/openllama-3b/lora.yml                | 4 ++++
 examples/openllama-3b/qlora.yml               | 4 ++++
 examples/phi/lora-3.5.yaml                    | 3 +++
 examples/phi/phi-ft.yml                       | 3 +++
 examples/phi/phi-qlora.yml                    | 3 +++
 examples/phi/phi2-ft.yml                      | 3 +++
 examples/phi/phi3-ft-fsdp.yml                 | 3 +++
 examples/phi/phi3-ft.yml                      | 4 ++++
 examples/pythia-12b/config.yml                | 4 ++++
 examples/pythia/lora.yml                      | 3 +++
 examples/qwen/lora.yml                        | 3 +++
 examples/qwen/qlora.yml                       | 3 +++
 examples/qwen/qwen2-moe-lora.yaml             | 3 +++
 examples/qwen/qwen2-moe-qlora.yaml            | 3 +++
 examples/qwen2/dpo.yaml                       | 2 ++
 examples/qwen2/qlora-fsdp.yaml                | 3 +++
 examples/redpajama/config-3b.yml              | 4 ++++
 examples/replit-3b/config-lora.yml            | 3 +++
 examples/stablelm-2/1.6b/fft.yml              | 4 ++++
 examples/stablelm-2/1.6b/lora.yml             | 4 ++++
 examples/starcoder2/qlora.yml                 | 2 ++
 examples/tiny-llama/lora-mps.yml              | 3 +++
 examples/tiny-llama/lora.yml                  | 3 +++
 examples/tiny-llama/pretrain.yml              | 4 +++-
 examples/tiny-llama/qlora.yml                 | 3 +++
 examples/xgen-7b/xgen-7b-8k-qlora.yml         | 7 ++++++-
 examples/yi-34B-chat/qlora.yml                | 3 +++
 87 files changed, 286 insertions(+), 8 deletions(-)

diff --git a/examples/cerebras/btlm-ft.yml b/examples/cerebras/btlm-ft.yml
index ba4e65daa..780616e04 100644
--- a/examples/cerebras/btlm-ft.yml
+++ b/examples/cerebras/btlm-ft.yml
@@ -1,6 +1,10 @@
 base_model: cerebras/btlm-3b-8k-base
+# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: GPT2Tokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
 trust_remote_code: true
 tokenizer_use_fast: true
 tokenizer_legacy: true
diff --git a/examples/cerebras/qlora.yml b/examples/cerebras/qlora.yml
index 285607a4c..866b4ab58 100644
--- a/examples/cerebras/qlora.yml
+++ b/examples/cerebras/qlora.yml
@@ -1,4 +1,7 @@
 base_model: cerebras/Cerebras-GPT-1.3B
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
 load_in_8bit: false
 load_in_4bit: true
 strict: false
diff --git a/examples/code-llama/13b/lora.yml b/examples/code-llama/13b/lora.yml
index 0ba96cfaa..2b8a720b2 100644
--- a/examples/code-llama/13b/lora.yml
+++ b/examples/code-llama/13b/lora.yml
@@ -1,6 +1,9 @@
 base_model: codellama/CodeLlama-13b-hf
+# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: true
 load_in_4bit: false
diff --git a/examples/code-llama/13b/qlora.yml b/examples/code-llama/13b/qlora.yml
index 787862d01..92aa6ac97 100644
--- a/examples/code-llama/13b/qlora.yml
+++ b/examples/code-llama/13b/qlora.yml
@@ -1,6 +1,9 @@
 base_model: codellama/CodeLlama-13b-hf
+# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: false
 load_in_4bit: true
diff --git a/examples/code-llama/34b/lora.yml b/examples/code-llama/34b/lora.yml
index 92d4c544a..af343e389 100644
--- a/examples/code-llama/34b/lora.yml
+++ b/examples/code-llama/34b/lora.yml
@@ -1,6 +1,9 @@
 base_model: codellama/CodeLlama-34b-hf
+# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: true
 load_in_4bit: false
diff --git a/examples/code-llama/34b/qlora.yml b/examples/code-llama/34b/qlora.yml
index 93a6de877..f45e9205f 100644
--- a/examples/code-llama/34b/qlora.yml
+++ b/examples/code-llama/34b/qlora.yml
@@ -1,6 +1,9 @@
 base_model: codellama/CodeLlama-34b-hf
+# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: false
 load_in_4bit: true
diff --git a/examples/code-llama/7b/lora.yml b/examples/code-llama/7b/lora.yml
index d13f50532..6c385dbcb 100644
--- a/examples/code-llama/7b/lora.yml
+++ b/examples/code-llama/7b/lora.yml
@@ -1,6 +1,9 @@
 base_model: codellama/CodeLlama-7b-hf
+# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: true
 load_in_4bit: false
diff --git a/examples/code-llama/7b/qlora.yml b/examples/code-llama/7b/qlora.yml
index a1026a982..ccd256406 100644
--- a/examples/code-llama/7b/qlora.yml
+++ b/examples/code-llama/7b/qlora.yml
@@ -1,6 +1,9 @@
 base_model: codellama/CodeLlama-7b-hf
+# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: CodeLlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: false
 load_in_4bit: true
diff --git a/examples/dbrx/16bit-lora.yaml b/examples/dbrx/16bit-lora.yaml
index 32b625ac6..645ba1d59 100644
--- a/examples/dbrx/16bit-lora.yaml
+++ b/examples/dbrx/16bit-lora.yaml
@@ -1,4 +1,7 @@
 base_model: LnL-AI/dbrx-base-converted-v2
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
 trust_remote_code: true
 
 load_in_8bit: false
diff --git a/examples/dbrx/8bit-lora.yaml b/examples/dbrx/8bit-lora.yaml
index 50ee0a016..4b9f60756 100644
--- a/examples/dbrx/8bit-lora.yaml
+++ b/examples/dbrx/8bit-lora.yaml
@@ -1,4 +1,7 @@
 base_model: LnL-AI/dbrx-base-converted-v2
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
 trust_remote_code: true
 
 load_in_8bit: true
diff --git a/examples/dbrx/fft-ds-zero3.yaml b/examples/dbrx/fft-ds-zero3.yaml
index 60dc201ee..e42b63670 100644
--- a/examples/dbrx/fft-ds-zero3.yaml
+++ b/examples/dbrx/fft-ds-zero3.yaml
@@ -1,4 +1,7 @@
 base_model: LnL-AI/dbrx-base-converted-v2
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
 trust_remote_code: true
 
 load_in_8bit: false
diff --git a/examples/deepseek-v2/fft-fsdp-16b.yaml b/examples/deepseek-v2/fft-fsdp-16b.yaml
index b55646df7..3d4608a01 100644
--- a/examples/deepseek-v2/fft-fsdp-16b.yaml
+++ b/examples/deepseek-v2/fft-fsdp-16b.yaml
@@ -1,4 +1,6 @@
 base_model: deepseek-ai/DeepSeek-V2-Lite
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 trust_remote_code: true
 
 load_in_8bit: false
diff --git a/examples/deepseek-v2/qlora-fsdp-2_5.yaml b/examples/deepseek-v2/qlora-fsdp-2_5.yaml
index 6b8771d81..a89dc343a 100644
--- a/examples/deepseek-v2/qlora-fsdp-2_5.yaml
+++ b/examples/deepseek-v2/qlora-fsdp-2_5.yaml
@@ -1,4 +1,7 @@
 base_model: axolotl-quants/DeepSeek-V2.5-bnb-nf4-bf16
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
 trust_remote_code: true
 
 load_in_8bit: false
diff --git a/examples/falcon/config-7b-lora.yml b/examples/falcon/config-7b-lora.yml
index 029ca40e0..efbe38d4a 100644
--- a/examples/falcon/config-7b-lora.yml
+++ b/examples/falcon/config-7b-lora.yml
@@ -1,7 +1,12 @@
 base_model: tiiuae/falcon-7b
-trust_remote_code: true
+# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
+# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
+trust_remote_code: true
 
 load_in_8bit: true
 load_in_4bit: false
diff --git a/examples/falcon/config-7b-qlora.yml b/examples/falcon/config-7b-qlora.yml
index 4e34144ed..b9829db5f 100644
--- a/examples/falcon/config-7b-qlora.yml
+++ b/examples/falcon/config-7b-qlora.yml
@@ -1,10 +1,15 @@
 # 1b: tiiuae/falcon-rw-1b
 # 40b: tiiuae/falcon-40b
 base_model: tiiuae/falcon-7b
-# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
-trust_remote_code: true
+# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
+# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
+trust_remote_code: true
+
 
 load_in_8bit: false
 # enable 4bit for QLoRA
diff --git a/examples/falcon/config-7b.yml b/examples/falcon/config-7b.yml
index 36264f063..5e41a1e33 100644
--- a/examples/falcon/config-7b.yml
+++ b/examples/falcon/config-7b.yml
@@ -1,7 +1,12 @@
 base_model: tiiuae/falcon-7b
-trust_remote_code: true
+# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
+# required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
+trust_remote_code: true
 
 load_in_8bit: false
 load_in_4bit: false
diff --git a/examples/gemma/qlora.yml b/examples/gemma/qlora.yml
index e08facfc5..80a9fe62f 100644
--- a/examples/gemma/qlora.yml
+++ b/examples/gemma/qlora.yml
@@ -1,7 +1,10 @@
 # use google/gemma-7b if you have access
 base_model: mhenrichsen/gemma-7b
+# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: false
 load_in_4bit: true
diff --git a/examples/gemma2/qlora.yml b/examples/gemma2/qlora.yml
index 00e6d84e0..61a2ad876 100644
--- a/examples/gemma2/qlora.yml
+++ b/examples/gemma2/qlora.yml
@@ -1,6 +1,9 @@
 base_model: google/gemma-2-9b
+# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: false
 load_in_4bit: true
diff --git a/examples/gemma2/reward-model.yaml b/examples/gemma2/reward-model.yaml
index c1f993c3a..b492c6f93 100644
--- a/examples/gemma2/reward-model.yaml
+++ b/examples/gemma2/reward-model.yaml
@@ -1,6 +1,9 @@
 base_model: google/gemma-2-2b
+# optionally might have model_type or tokenizer_type
 model_type: AutoModelForSequenceClassification
 tokenizer_type: AutoTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: false
 load_in_4bit: false
diff --git a/examples/gptj/qlora.yml b/examples/gptj/qlora.yml
index f801729fa..ddd6d24c0 100644
--- a/examples/gptj/qlora.yml
+++ b/examples/gptj/qlora.yml
@@ -1,4 +1,7 @@
 base_model: EleutherAI/gpt-j-6b
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
 load_in_8bit: false
 load_in_4bit: true
 strict: false
diff --git a/examples/jamba/qlora.yaml b/examples/jamba/qlora.yaml
index 3d6f69e79..cab62513c 100644
--- a/examples/jamba/qlora.yaml
+++ b/examples/jamba/qlora.yaml
@@ -1,4 +1,7 @@
 base_model: ai21labs/Jamba-v0.1
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
 trust_remote_code: true
 
 load_in_8bit: false
diff --git a/examples/jamba/qlora_deepspeed.yaml b/examples/jamba/qlora_deepspeed.yaml
index 43a76c00b..7ac7bfac5 100644
--- a/examples/jamba/qlora_deepspeed.yaml
+++ b/examples/jamba/qlora_deepspeed.yaml
@@ -1,4 +1,6 @@
 base_model: ai21labs/Jamba-v0.1
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 trust_remote_code: true
 
 load_in_8bit: false
diff --git a/examples/jamba/qlora_fsdp_large.yaml b/examples/jamba/qlora_fsdp_large.yaml
index 84cf90642..8736680da 100644
--- a/examples/jamba/qlora_fsdp_large.yaml
+++ b/examples/jamba/qlora_fsdp_large.yaml
@@ -1,5 +1,8 @@
 base_model: ai21labs/AI21-Jamba-1.5-Large
+# optionally might have model_type or tokenizer_type
 tokenizer_type: AutoTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_4bit: true
 strict: false
diff --git a/examples/jeopardy-bot/config.yml b/examples/jeopardy-bot/config.yml
index 088629c08..04f92a8dc 100644
--- a/examples/jeopardy-bot/config.yml
+++ b/examples/jeopardy-bot/config.yml
@@ -1,6 +1,10 @@
 base_model: huggyllama/llama-7b
+# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
 load_in_8bit: false
 datasets:
   - path: openaccess-ai-collective/jeopardy
diff --git a/examples/llama-2/fft_optimized.yml b/examples/llama-2/fft_optimized.yml
index 3d94b04b8..3475fcd9a 100644
--- a/examples/llama-2/fft_optimized.yml
+++ b/examples/llama-2/fft_optimized.yml
@@ -1,6 +1,9 @@
 base_model: NousResearch/Llama-2-7b-hf
+# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: false
 load_in_4bit: false
diff --git a/examples/llama-2/gptq-lora.yml b/examples/llama-2/gptq-lora.yml
index 2a706265b..7e45f7d63 100644
--- a/examples/llama-2/gptq-lora.yml
+++ b/examples/llama-2/gptq-lora.yml
@@ -1,8 +1,13 @@
 base_model: TheBloke/Llama-2-7B-GPTQ
-gptq: true
-gptq_disable_exllama: true
+# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: LlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
+gptq: true
+gptq_disable_exllama: true
+
 tokenizer_use_fast: true
 tokenizer_legacy: true
 load_in_8bit: false
diff --git a/examples/llama-2/lisa.yml b/examples/llama-2/lisa.yml
index 7012d1f61..40391204c 100644
--- a/examples/llama-2/lisa.yml
+++ b/examples/llama-2/lisa.yml
@@ -1,6 +1,9 @@
 base_model: NousResearch/Llama-2-7b-hf
+# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: false
 load_in_4bit: false
diff --git a/examples/llama-2/loftq.yml b/examples/llama-2/loftq.yml
index 68d9ac014..a5108e70f 100644
--- a/examples/llama-2/loftq.yml
+++ b/examples/llama-2/loftq.yml
@@ -1,6 +1,9 @@
 base_model: NousResearch/Llama-2-7b-hf
+# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: false
 load_in_4bit: false
diff --git a/examples/llama-2/lora.yml b/examples/llama-2/lora.yml
index 95bfae692..ec0c80012 100644
--- a/examples/llama-2/lora.yml
+++ b/examples/llama-2/lora.yml
@@ -1,6 +1,9 @@
 base_model: NousResearch/Llama-2-7b-hf
+# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: true
 load_in_4bit: false
diff --git a/examples/llama-2/qlora-fsdp.yml b/examples/llama-2/qlora-fsdp.yml
index 88029f92d..204c91693 100644
--- a/examples/llama-2/qlora-fsdp.yml
+++ b/examples/llama-2/qlora-fsdp.yml
@@ -1,6 +1,9 @@
 base_model: NousResearch/Llama-2-7b-hf
+# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: false
 load_in_4bit: true
diff --git a/examples/llama-2/qlora.yml b/examples/llama-2/qlora.yml
index dda32170b..81d1acbec 100644
--- a/examples/llama-2/qlora.yml
+++ b/examples/llama-2/qlora.yml
@@ -1,6 +1,9 @@
 base_model: NousResearch/Llama-2-7b-hf
+# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: false
 load_in_4bit: true
diff --git a/examples/llama-3-vision/lora-11b.yaml b/examples/llama-3-vision/lora-11b.yaml
index b2e494641..22dc3a9af 100644
--- a/examples/llama-3-vision/lora-11b.yaml
+++ b/examples/llama-3-vision/lora-11b.yaml
@@ -1,5 +1,9 @@
 base_model: alpindale/Llama-3.2-11B-Vision-Instruct
+# optionally might have model_type or tokenizer_type or processor_type
 processor_type: AutoProcessor
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
 strict: false
 
 # these 3 lines are needed for now to handle vision chat templates w images
diff --git a/examples/llama-3/fft-8b-liger-fsdp.yaml b/examples/llama-3/fft-8b-liger-fsdp.yaml
index 043b5c980..2c8589b17 100644
--- a/examples/llama-3/fft-8b-liger-fsdp.yaml
+++ b/examples/llama-3/fft-8b-liger-fsdp.yaml
@@ -1,4 +1,6 @@
 base_model: NousResearch/Meta-Llama-3.1-8B
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 plugins:
   - axolotl.integrations.liger.LigerPlugin
diff --git a/examples/llama-3/fft-8b.yaml b/examples/llama-3/fft-8b.yaml
index 335902aac..a129c6e5b 100644
--- a/examples/llama-3/fft-8b.yaml
+++ b/examples/llama-3/fft-8b.yaml
@@ -1,4 +1,6 @@
 base_model: NousResearch/Meta-Llama-3.1-8B
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: false
 load_in_4bit: false
diff --git a/examples/llama-3/instruct-dpo-lora-8b.yml b/examples/llama-3/instruct-dpo-lora-8b.yml
index dc8835035..bb58b677a 100644
--- a/examples/llama-3/instruct-dpo-lora-8b.yml
+++ b/examples/llama-3/instruct-dpo-lora-8b.yml
@@ -1,6 +1,9 @@
 base_model: meta-llama/Meta-Llama-3-8B-Instruct
+# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: AutoTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: true
 load_in_4bit: false
diff --git a/examples/llama-3/instruct-lora-8b.yml b/examples/llama-3/instruct-lora-8b.yml
index ae9a8088c..853f85d74 100644
--- a/examples/llama-3/instruct-lora-8b.yml
+++ b/examples/llama-3/instruct-lora-8b.yml
@@ -1,6 +1,9 @@
 base_model: NousResearch/Meta-Llama-3-8B-Instruct
+# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: AutoTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: true
 load_in_4bit: false
diff --git a/examples/llama-3/lora-1b-deduplicate-dpo.yml b/examples/llama-3/lora-1b-deduplicate-dpo.yml
index 35a0260ca..00314da75 100644
--- a/examples/llama-3/lora-1b-deduplicate-dpo.yml
+++ b/examples/llama-3/lora-1b-deduplicate-dpo.yml
@@ -1,6 +1,9 @@
 base_model: meta-llama/Llama-3.2-1B
+# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: AutoTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: true
 load_in_4bit: false
diff --git a/examples/llama-3/lora-1b-deduplicate-sft.yml b/examples/llama-3/lora-1b-deduplicate-sft.yml
index c07d5f8ff..451696465 100644
--- a/examples/llama-3/lora-1b-deduplicate-sft.yml
+++ b/examples/llama-3/lora-1b-deduplicate-sft.yml
@@ -1,6 +1,9 @@
 base_model: meta-llama/Llama-3.2-1B
+# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: AutoTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: true
 load_in_4bit: false
diff --git a/examples/llama-3/lora-1b.yml b/examples/llama-3/lora-1b.yml
index bdda2ed75..a1c3afa87 100644
--- a/examples/llama-3/lora-1b.yml
+++ b/examples/llama-3/lora-1b.yml
@@ -1,4 +1,6 @@
 base_model: NousResearch/Llama-3.2-1B
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: false
 load_in_4bit: false
diff --git a/examples/llama-3/lora-8b.yml b/examples/llama-3/lora-8b.yml
index a20a529f5..7921857ce 100644
--- a/examples/llama-3/lora-8b.yml
+++ b/examples/llama-3/lora-8b.yml
@@ -1,6 +1,9 @@
 base_model: NousResearch/Meta-Llama-3-8B
+# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: AutoTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: true
 load_in_4bit: false
diff --git a/examples/llama-3/qlora-1b-kto.yaml b/examples/llama-3/qlora-1b-kto.yaml
index a876d8fd7..df4a08489 100644
--- a/examples/llama-3/qlora-1b-kto.yaml
+++ b/examples/llama-3/qlora-1b-kto.yaml
@@ -1,4 +1,6 @@
 base_model: meta-llama/Llama-3.2-1B
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: false
 load_in_4bit: true
diff --git a/examples/llama-3/qlora-1b.yml b/examples/llama-3/qlora-1b.yml
index be40db846..226bbb237 100644
--- a/examples/llama-3/qlora-1b.yml
+++ b/examples/llama-3/qlora-1b.yml
@@ -1,4 +1,6 @@
 base_model: NousResearch/Llama-3.2-1B
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: false
 load_in_4bit: true
diff --git a/examples/llama-3/qlora-fsdp-405b.yaml b/examples/llama-3/qlora-fsdp-405b.yaml
index 6eeec01c9..a60a97ef3 100644
--- a/examples/llama-3/qlora-fsdp-405b.yaml
+++ b/examples/llama-3/qlora-fsdp-405b.yaml
@@ -1,5 +1,8 @@
 base_model: hugging-quants/Meta-Llama-3.1-405B-BNB-NF4-BF16
+# optionally might have model_type or tokenizer_type
 tokenizer_type: AutoTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_4bit: true
 strict: false
diff --git a/examples/llama-3/qlora-fsdp-70b.yaml b/examples/llama-3/qlora-fsdp-70b.yaml
index 9b74f6b4d..932e1a0d6 100644
--- a/examples/llama-3/qlora-fsdp-70b.yaml
+++ b/examples/llama-3/qlora-fsdp-70b.yaml
@@ -1,6 +1,9 @@
 base_model: casperhansen/llama-3-70b-fp16
+# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: AutoTokenizer  # PreTrainedTokenizerFast
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: false
 load_in_4bit: true
diff --git a/examples/llama-3/qlora.yml b/examples/llama-3/qlora.yml
index 079c9cad0..64268a205 100644
--- a/examples/llama-3/qlora.yml
+++ b/examples/llama-3/qlora.yml
@@ -1,6 +1,9 @@
 base_model: NousResearch/Meta-Llama-3-8B
+# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: false
 load_in_4bit: true
diff --git a/examples/mamba/config.yml b/examples/mamba/config.yml
index f88f5138d..ca96fbfc3 100644
--- a/examples/mamba/config.yml
+++ b/examples/mamba/config.yml
@@ -1,7 +1,10 @@
 base_model: state-spaces/mamba-2.8b
+# optionally might have model_type or tokenizer_type or tokenizer_config
 model_type: MambaLMHeadModel
 tokenizer_type: AutoTokenizer
 tokenizer_config: EleutherAI/gpt-neox-20b
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: false
 load_in_4bit: false
diff --git a/examples/mistral/bigstral-ds-zero3.yaml b/examples/mistral/bigstral-ds-zero3.yaml
index e993e44a7..5ee214c1b 100644
--- a/examples/mistral/bigstral-ds-zero3.yaml
+++ b/examples/mistral/bigstral-ds-zero3.yaml
@@ -1,6 +1,10 @@
 base_model: mistral-community/Mixtral-8x22B-v0.1
+# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: LlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
 trust_remote_code: true
 
 load_in_8bit: false
diff --git a/examples/mistral/config.yml b/examples/mistral/config.yml
index a70937c4f..890203339 100644
--- a/examples/mistral/config.yml
+++ b/examples/mistral/config.yml
@@ -1,6 +1,9 @@
 base_model: mistralai/Mistral-7B-v0.1
+# optionally might have model_type or tokenizer_type
 model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: false
 load_in_4bit: false
diff --git a/examples/mistral/lora-mps.yml b/examples/mistral/lora-mps.yml
index 03c74bb59..c1df9896c 100644
--- a/examples/mistral/lora-mps.yml
+++ b/examples/mistral/lora-mps.yml
@@ -1,6 +1,9 @@
 base_model: mistralai/Mistral-7B-v0.1
+# optionally might have model_type or tokenizer_type
 model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: false
 load_in_4bit: false
diff --git a/examples/mistral/lora.yml b/examples/mistral/lora.yml
index 0d5dc9edd..11c1e0ee7 100644
--- a/examples/mistral/lora.yml
+++ b/examples/mistral/lora.yml
@@ -1,6 +1,9 @@
 base_model: mistralai/Mistral-7B-v0.1
+# optionally might have model_type or tokenizer_type
 model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: true
 load_in_4bit: false
diff --git a/examples/mistral/mistral-dpo-qlora.yml b/examples/mistral/mistral-dpo-qlora.yml
index a558e0453..e2eb6a264 100644
--- a/examples/mistral/mistral-dpo-qlora.yml
+++ b/examples/mistral/mistral-dpo-qlora.yml
@@ -4,8 +4,11 @@
 #face problems with the special tokens.
 
 base_model: mistralai/Mistral-7B-Instruct-v0.2
+# optionally might have model_type or tokenizer_type
 model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: false
 load_in_4bit: true
diff --git a/examples/mistral/mistral-qlora-fsdp.yml b/examples/mistral/mistral-qlora-fsdp.yml
index e6b07c594..521f4de5f 100644
--- a/examples/mistral/mistral-qlora-fsdp.yml
+++ b/examples/mistral/mistral-qlora-fsdp.yml
@@ -1,6 +1,10 @@
 base_model: mistralai/Mixtral-8x7B-v0.1
+# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: LlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
 trust_remote_code: true
 
 load_in_8bit: false
diff --git a/examples/mistral/mistral-qlora-orpo.yml b/examples/mistral/mistral-qlora-orpo.yml
index 2549ef018..82f30dc17 100644
--- a/examples/mistral/mistral-qlora-orpo.yml
+++ b/examples/mistral/mistral-qlora-orpo.yml
@@ -1,6 +1,9 @@
 base_model: mistralai/Mistral-7B-v0.1
+# optionally might have model_type or tokenizer_type
 model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: false
 load_in_4bit: true
diff --git a/examples/mistral/mixtral-8x22b-qlora-fsdp.yml b/examples/mistral/mixtral-8x22b-qlora-fsdp.yml
index fe68b2817..4a65b1a7d 100644
--- a/examples/mistral/mixtral-8x22b-qlora-fsdp.yml
+++ b/examples/mistral/mixtral-8x22b-qlora-fsdp.yml
@@ -1,6 +1,9 @@
 base_model: mistral-community/Mixtral-8x22B-v0.1
+# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: LlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: false
 load_in_4bit: true
diff --git a/examples/mistral/mixtral-qlora-fsdp.yml b/examples/mistral/mixtral-qlora-fsdp.yml
index c09597040..fbd9bd937 100644
--- a/examples/mistral/mixtral-qlora-fsdp.yml
+++ b/examples/mistral/mixtral-qlora-fsdp.yml
@@ -1,6 +1,10 @@
 base_model: mistralai/Mixtral-8x7B-v0.1
+# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: LlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
 trust_remote_code: true
 
 load_in_8bit: false
diff --git a/examples/mistral/mixtral.yml b/examples/mistral/mixtral.yml
index 13fbe92ab..ac80ec933 100644
--- a/examples/mistral/mixtral.yml
+++ b/examples/mistral/mixtral.yml
@@ -1,6 +1,10 @@
 base_model: mistralai/Mixtral-8x7B-v0.1
+# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: LlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
 trust_remote_code: true
 
 load_in_8bit: false
diff --git a/examples/mistral/mixtral_22.yml b/examples/mistral/mixtral_22.yml
index 9a1e86386..7f2a72212 100644
--- a/examples/mistral/mixtral_22.yml
+++ b/examples/mistral/mixtral_22.yml
@@ -1,6 +1,10 @@
 base_model: mistral-community/Mixtral-8x22B-v0.1
+# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: LlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
 trust_remote_code: true
 
 load_in_8bit: false
diff --git a/examples/mistral/qlora.yml b/examples/mistral/qlora.yml
index c7bdb155c..5f3fa10b8 100644
--- a/examples/mistral/qlora.yml
+++ b/examples/mistral/qlora.yml
@@ -1,6 +1,9 @@
 base_model: mistralai/Mistral-7B-v0.1
+# optionally might have model_type or tokenizer_type
 model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: false
 load_in_4bit: true
diff --git a/examples/mpt-7b/config.yml b/examples/mpt-7b/config.yml
index 530415de1..cf4b433fe 100644
--- a/examples/mpt-7b/config.yml
+++ b/examples/mpt-7b/config.yml
@@ -1,5 +1,9 @@
 base_model: mosaicml/mpt-7b
+# optionally might have model_type or tokenizer_type
 tokenizer_type: AutoTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
 trust_remote_code: true  # required for mpt as their model class is not merged into transformers yet
 load_in_8bit: false
 datasets:
diff --git a/examples/openllama-3b/config.yml b/examples/openllama-3b/config.yml
index a0473213c..ec66014b4 100644
--- a/examples/openllama-3b/config.yml
+++ b/examples/openllama-3b/config.yml
@@ -1,6 +1,10 @@
 base_model: openlm-research/open_llama_3b_v2
+# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
 load_in_8bit: false
 load_in_4bit: false
 strict: false
diff --git a/examples/openllama-3b/lora.yml b/examples/openllama-3b/lora.yml
index 2b6784915..b449df9ae 100644
--- a/examples/openllama-3b/lora.yml
+++ b/examples/openllama-3b/lora.yml
@@ -1,6 +1,10 @@
 base_model: openlm-research/open_llama_3b_v2
+# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
 load_in_8bit: true
 load_in_4bit: false
 strict: false
diff --git a/examples/openllama-3b/qlora.yml b/examples/openllama-3b/qlora.yml
index 8d4dc05ca..3efcdabc6 100644
--- a/examples/openllama-3b/qlora.yml
+++ b/examples/openllama-3b/qlora.yml
@@ -1,6 +1,10 @@
 base_model: openlm-research/open_llama_3b_v2
+# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
 load_in_8bit: false
 load_in_4bit: true
 strict: false
diff --git a/examples/phi/lora-3.5.yaml b/examples/phi/lora-3.5.yaml
index 246701148..8c0205f4c 100644
--- a/examples/phi/lora-3.5.yaml
+++ b/examples/phi/lora-3.5.yaml
@@ -1,6 +1,9 @@
 base_model: microsoft/Phi-3.5-mini-instruct
+# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: true
 load_in_4bit: false
diff --git a/examples/phi/phi-ft.yml b/examples/phi/phi-ft.yml
index 0dabadc7a..fc5848dc5 100644
--- a/examples/phi/phi-ft.yml
+++ b/examples/phi/phi-ft.yml
@@ -1,6 +1,9 @@
 base_model: microsoft/phi-1_5
+# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: false
 load_in_4bit: false
diff --git a/examples/phi/phi-qlora.yml b/examples/phi/phi-qlora.yml
index 7c181a3c1..a98cd1040 100644
--- a/examples/phi/phi-qlora.yml
+++ b/examples/phi/phi-qlora.yml
@@ -1,6 +1,9 @@
 base_model: microsoft/phi-1_5
+# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: false
 load_in_4bit: true
diff --git a/examples/phi/phi2-ft.yml b/examples/phi/phi2-ft.yml
index 27815550b..0f656f821 100644
--- a/examples/phi/phi2-ft.yml
+++ b/examples/phi/phi2-ft.yml
@@ -1,6 +1,9 @@
 base_model: microsoft/phi-2
+# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: false
 load_in_4bit: false
diff --git a/examples/phi/phi3-ft-fsdp.yml b/examples/phi/phi3-ft-fsdp.yml
index d220e577d..c081e47b9 100644
--- a/examples/phi/phi3-ft-fsdp.yml
+++ b/examples/phi/phi3-ft-fsdp.yml
@@ -1,6 +1,9 @@
 base_model: microsoft/Phi-3-mini-4k-instruct
+# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: false
 load_in_4bit: false
diff --git a/examples/phi/phi3-ft.yml b/examples/phi/phi3-ft.yml
index 18db9b8b7..ac42153af 100644
--- a/examples/phi/phi3-ft.yml
+++ b/examples/phi/phi3-ft.yml
@@ -1,7 +1,11 @@
 base_model: microsoft/Phi-3-mini-4k-instruct
+# optionally might have model_type or tokenizer_type
 trust_remote_code: true
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
 chat_template: phi_3
 
 load_in_8bit: false
diff --git a/examples/pythia-12b/config.yml b/examples/pythia-12b/config.yml
index 18e6beaaf..52ab77055 100644
--- a/examples/pythia-12b/config.yml
+++ b/examples/pythia-12b/config.yml
@@ -1,7 +1,11 @@
 base_model: EleutherAI/pythia-12b-deduped
 base_model_ignore_patterns: pytorch*  # prefer safetensors
+# optionally might have model_type or tokenizer_type
 model_type: GPTNeoXForCausalLM
 tokenizer_type: AutoTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
 load_in_8bit: false
 load_in_4bit: false
 gptq: false
diff --git a/examples/pythia/lora.yml b/examples/pythia/lora.yml
index 0aa650f67..203652f6b 100644
--- a/examples/pythia/lora.yml
+++ b/examples/pythia/lora.yml
@@ -1,4 +1,7 @@
 base_model: EleutherAI/pythia-1.4b-deduped
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
 load_in_8bit: true
 datasets:
   - path: teknium/GPT4-LLM-Cleaned
diff --git a/examples/qwen/lora.yml b/examples/qwen/lora.yml
index dd8dc1e4f..961125a51 100644
--- a/examples/qwen/lora.yml
+++ b/examples/qwen/lora.yml
@@ -1,6 +1,9 @@
 base_model: Qwen/Qwen-7B
+# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 trust_remote_code: true
 
diff --git a/examples/qwen/qlora.yml b/examples/qwen/qlora.yml
index 01c0c0ab8..e7159eaa5 100644
--- a/examples/qwen/qlora.yml
+++ b/examples/qwen/qlora.yml
@@ -1,6 +1,9 @@
 base_model: Qwen/Qwen-7B
+# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 trust_remote_code: true
 
diff --git a/examples/qwen/qwen2-moe-lora.yaml b/examples/qwen/qwen2-moe-lora.yaml
index 452335e38..b357b9344 100644
--- a/examples/qwen/qwen2-moe-lora.yaml
+++ b/examples/qwen/qwen2-moe-lora.yaml
@@ -1,4 +1,7 @@
 base_model: Qwen/Qwen1.5-MoE-A2.7B
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
 trust_remote_code: true
 
 load_in_8bit: false
diff --git a/examples/qwen/qwen2-moe-qlora.yaml b/examples/qwen/qwen2-moe-qlora.yaml
index bc11007c7..d45e4c89f 100644
--- a/examples/qwen/qwen2-moe-qlora.yaml
+++ b/examples/qwen/qwen2-moe-qlora.yaml
@@ -1,4 +1,7 @@
 base_model: Qwen/Qwen1.5-MoE-A2.7B
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
 trust_remote_code: true
 
 load_in_8bit: false
diff --git a/examples/qwen2/dpo.yaml b/examples/qwen2/dpo.yaml
index 64c3e7629..e924be195 100644
--- a/examples/qwen2/dpo.yaml
+++ b/examples/qwen2/dpo.yaml
@@ -1,4 +1,6 @@
 base_model: Qwen/Qwen2.5-0.5B
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 strict: false
 
diff --git a/examples/qwen2/qlora-fsdp.yaml b/examples/qwen2/qlora-fsdp.yaml
index d61c72a37..cc4974908 100644
--- a/examples/qwen2/qlora-fsdp.yaml
+++ b/examples/qwen2/qlora-fsdp.yaml
@@ -1,4 +1,7 @@
 base_model: Qwen/Qwen2-7B
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
 trust_remote_code: true
 
 load_in_8bit: false
diff --git a/examples/redpajama/config-3b.yml b/examples/redpajama/config-3b.yml
index ff395a863..d716727a3 100644
--- a/examples/redpajama/config-3b.yml
+++ b/examples/redpajama/config-3b.yml
@@ -1,6 +1,10 @@
 base_model: togethercomputer/RedPajama-INCITE-Chat-3B-v1
+# optionally might have model_type or tokenizer_type
 model_type: GPTNeoXForCausalLM
 tokenizer_type: AutoTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
 trust_remote_code:
 load_in_8bit: false
 datasets:
diff --git a/examples/replit-3b/config-lora.yml b/examples/replit-3b/config-lora.yml
index 9fee099d4..bb2a6aace 100644
--- a/examples/replit-3b/config-lora.yml
+++ b/examples/replit-3b/config-lora.yml
@@ -1,4 +1,7 @@
 base_model: replit/replit-code-v1-3b
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
 trust_remote_code: true
 load_in_8bit: false
 datasets:
diff --git a/examples/stablelm-2/1.6b/fft.yml b/examples/stablelm-2/1.6b/fft.yml
index 777262a7e..3ecb1581b 100644
--- a/examples/stablelm-2/1.6b/fft.yml
+++ b/examples/stablelm-2/1.6b/fft.yml
@@ -1,6 +1,10 @@
 base_model: stabilityai/stablelm-2-1_6b
+# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
 trust_remote_code: true
 
 load_in_8bit: false
diff --git a/examples/stablelm-2/1.6b/lora.yml b/examples/stablelm-2/1.6b/lora.yml
index c65b9e4cd..8597de6a2 100644
--- a/examples/stablelm-2/1.6b/lora.yml
+++ b/examples/stablelm-2/1.6b/lora.yml
@@ -1,6 +1,10 @@
 base_model: stabilityai/stablelm-2-1_6b
+# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
 trust_remote_code: true
 
 load_in_8bit: true
diff --git a/examples/starcoder2/qlora.yml b/examples/starcoder2/qlora.yml
index 83fc0d89f..d1db71d6d 100644
--- a/examples/starcoder2/qlora.yml
+++ b/examples/starcoder2/qlora.yml
@@ -1,4 +1,6 @@
 base_model: bigcode/starcoder2-3b
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: false
 load_in_4bit: true
diff --git a/examples/tiny-llama/lora-mps.yml b/examples/tiny-llama/lora-mps.yml
index 99e404e43..f949acd0f 100644
--- a/examples/tiny-llama/lora-mps.yml
+++ b/examples/tiny-llama/lora-mps.yml
@@ -1,6 +1,9 @@
 base_model: TinyLlama/TinyLlama_v1.1
+# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: true
 load_in_4bit: false
diff --git a/examples/tiny-llama/lora.yml b/examples/tiny-llama/lora.yml
index 7c7fb6706..54aa5ec27 100644
--- a/examples/tiny-llama/lora.yml
+++ b/examples/tiny-llama/lora.yml
@@ -1,5 +1,8 @@
 base_model: TinyLlama/TinyLlama_v1.1
+# optionally might have model_type or tokenizer_type
 tokenizer_type: AutoTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: true
 load_in_4bit: false
diff --git a/examples/tiny-llama/pretrain.yml b/examples/tiny-llama/pretrain.yml
index 010a1608a..fd6d2c9c1 100644
--- a/examples/tiny-llama/pretrain.yml
+++ b/examples/tiny-llama/pretrain.yml
@@ -1,7 +1,9 @@
 base_model: TinyLlama/TinyLlama-1.1B-Chat-v1.0
-
+# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: false
 load_in_4bit: false
diff --git a/examples/tiny-llama/qlora.yml b/examples/tiny-llama/qlora.yml
index 931fe03e0..694ab3a15 100644
--- a/examples/tiny-llama/qlora.yml
+++ b/examples/tiny-llama/qlora.yml
@@ -1,6 +1,9 @@
 base_model: TinyLlama/TinyLlama_v1.1
+# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: false
 load_in_4bit: true
diff --git a/examples/xgen-7b/xgen-7b-8k-qlora.yml b/examples/xgen-7b/xgen-7b-8k-qlora.yml
index 7e3f83cbd..d798e326d 100644
--- a/examples/xgen-7b/xgen-7b-8k-qlora.yml
+++ b/examples/xgen-7b/xgen-7b-8k-qlora.yml
@@ -1,9 +1,14 @@
 # An example finetuning Saleforce's XGen-7b model with 8k context using qlora
 # on Tim Dettmer's Guanaco dataset.
 base_model: Salesforce/xgen-7b-8k-base
-trust_remote_code: true
+# optionally might have model_type or tokenizer_type
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
+
+trust_remote_code: true
+
 load_in_8bit: false
 # enable 4bit for QLoRA
 load_in_4bit: true
diff --git a/examples/yi-34B-chat/qlora.yml b/examples/yi-34B-chat/qlora.yml
index 7fe322d63..b68d00883 100644
--- a/examples/yi-34B-chat/qlora.yml
+++ b/examples/yi-34B-chat/qlora.yml
@@ -1,6 +1,9 @@
 base_model: 01-ai/Yi-34B-Chat
+# optionally might have model_type or tokenizer_type
 model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
+# Automatically upload checkpoint and final model to HF
+# hub_model_id: username/custom_model_name
 
 load_in_8bit: false
 load_in_4bit: true