From dd8bad06d0b4849a5a32dcb7f0f91b4a5be869f7 Mon Sep 17 00:00:00 2001
From: Wing Lian <wing.lian@gmail.com>
Date: Sat, 12 Apr 2025 07:25:11 -0700
Subject: [PATCH] remove strict=false from example yamls [skip ci] (#2523)
 [skip ci]

---
 examples/cerebras/btlm-ft.yml                            | 1 -
 examples/cerebras/qlora.yml                              | 1 -
 examples/code-llama/13b/lora.yml                         | 1 -
 examples/code-llama/13b/qlora.yml                        | 1 -
 examples/code-llama/34b/lora.yml                         | 1 -
 examples/code-llama/34b/qlora.yml                        | 1 -
 examples/code-llama/7b/lora.yml                          | 1 -
 examples/code-llama/7b/qlora.yml                         | 1 -
 examples/cohere/command-r-7b-qlora.yml                   | 1 -
 examples/dbrx/16bit-lora.yaml                            | 1 -
 examples/dbrx/8bit-lora.yaml                             | 1 -
 examples/dbrx/fft-ds-zero3.yaml                          | 1 -
 examples/deepseek-v2/fft-fsdp-16b.yaml                   | 1 -
 examples/deepseek-v2/qlora-fsdp-2_5.yaml                 | 1 -
 examples/falcon/config-7b-lora.yml                       | 1 -
 examples/falcon/config-7b-qlora.yml                      | 1 -
 examples/falcon/config-7b.yml                            | 1 -
 examples/gemma/qlora.yml                                 | 1 -
 examples/gemma2/qlora.yml                                | 1 -
 examples/gemma2/reward-model.yaml                        | 1 -
 examples/gemma3/gemma-3-1b-qlora.yml                     | 1 -
 examples/gemma3/gemma-3-4b-qlora.yml                     | 1 -
 examples/gemma3/gemma-3-4b-vision-qlora.yml              | 1 -
 examples/gptj/qlora.yml                                  | 1 -
 examples/jamba/qlora.yaml                                | 1 -
 examples/jamba/qlora_deepspeed.yaml                      | 1 -
 examples/jamba/qlora_fsdp_large.yaml                     | 1 -
 examples/llama-2/fft_optimized.yml                       | 1 -
 examples/llama-2/gptq-lora.yml                           | 1 -
 examples/llama-2/lisa.yml                                | 1 -
 examples/llama-2/loftq.yml                               | 1 -
 examples/llama-2/lora.yml                                | 1 -
 examples/llama-2/qlora-fsdp.yml                          | 1 -
 examples/llama-2/qlora.yml                               | 1 -
 examples/llama-2/relora.yml                              | 1 -
 examples/llama-3-vision/lora-11b.yaml                    | 1 -
 examples/llama-3/fft-8b-liger-fsdp.yaml                  | 1 -
 examples/llama-3/fft-8b.yaml                             | 1 -
 examples/llama-3/instruct-dpo-lora-8b.yml                | 1 -
 examples/llama-3/instruct-lora-8b.yml                    | 1 -
 examples/llama-3/lora-1b-deduplicate-dpo.yml             | 1 -
 examples/llama-3/lora-1b-deduplicate-sft.yml             | 1 -
 examples/llama-3/lora-1b-kernels.yml                     | 1 -
 examples/llama-3/lora-1b-ray.yml                         | 1 -
 examples/llama-3/lora-1b-sample-packing-sequentially.yml | 1 -
 examples/llama-3/lora-1b.yml                             | 1 -
 examples/llama-3/lora-8b.yml                             | 1 -
 examples/llama-3/qlora-1b-kto.yaml                       | 1 -
 examples/llama-3/qlora-1b.yml                            | 1 -
 examples/llama-3/qlora-fsdp-405b.yaml                    | 1 -
 examples/llama-3/qlora-fsdp-70b.yaml                     | 1 -
 examples/llama-3/qlora.yml                               | 1 -
 examples/llama-4/maverick-qlora-fsdp1.yaml               | 1 -
 examples/llama-4/scout-qlora-fsdp1.yaml                  | 1 -
 examples/llama-4/scout-qlora-single-h100.yaml            | 1 -
 examples/llama-4/scout-vision-qlora-fsdp.yaml            | 1 -
 examples/llava/lora-7b.yaml                              | 1 -
 examples/mamba/config.yml                                | 1 -
 examples/mistral/bigstral-ds-zero3.yaml                  | 1 -
 examples/mistral/config.yml                              | 1 -
 examples/mistral/lora-mps.yml                            | 1 -
 examples/mistral/lora.yml                                | 1 -
 examples/mistral/mistral-dpo-qlora.yml                   | 1 -
 examples/mistral/mistral-qlora-fsdp.yml                  | 1 -
 examples/mistral/mistral-qlora-orpo.yml                  | 1 -
 examples/mistral/mistral-small-3.1-24B-lora.yml          | 1 -
 examples/mistral/mixtral-8x22b-qlora-fsdp.yml            | 1 -
 examples/mistral/mixtral-qlora-fsdp.yml                  | 1 -
 examples/mistral/mixtral.yml                             | 1 -
 examples/mistral/mixtral_22.yml                          | 1 -
 examples/mistral/qlora.yml                               | 1 -
 examples/openllama-3b/config.yml                         | 1 -
 examples/openllama-3b/lora.yml                           | 1 -
 examples/openllama-3b/qlora.yml                          | 1 -
 examples/phi/lora-3.5.yaml                               | 1 -
 examples/phi/phi-ft.yml                                  | 1 -
 examples/phi/phi-qlora.yml                               | 1 -
 examples/phi/phi2-ft.yml                                 | 1 -
 examples/phi/phi3-ft-fsdp.yml                            | 1 -
 examples/phi/phi3-ft.yml                                 | 1 -
 examples/pixtral/lora-12b.yml                            | 1 -
 examples/qwen/lora.yml                                   | 1 -
 examples/qwen/qlora.yml                                  | 1 -
 examples/qwen/qwen2-moe-lora.yaml                        | 1 -
 examples/qwen/qwen2-moe-qlora.yaml                       | 1 -
 examples/qwen2-vl/lora-7b.yaml                           | 1 -
 examples/qwen2/dpo.yaml                                  | 1 -
 examples/qwen2/prm.yaml                                  | 1 -
 examples/qwen2/qlora-fsdp.yaml                           | 1 -
 examples/qwen2/reward-model.yaml                         | 1 -
 examples/stablelm-2/1.6b/fft.yml                         | 1 -
 examples/stablelm-2/1.6b/lora.yml                        | 1 -
 examples/starcoder2/qlora.yml                            | 1 -
 examples/tiny-llama/lora-mps.yml                         | 1 -
 examples/tiny-llama/lora.yml                             | 1 -
 examples/tiny-llama/pretrain.yml                         | 1 -
 examples/tiny-llama/qlora.yml                            | 1 -
 examples/xgen-7b/xgen-7b-8k-qlora.yml                    | 1 -
 examples/yi-34B-chat/qlora.yml                           | 1 -
 99 files changed, 99 deletions(-)

diff --git a/examples/cerebras/btlm-ft.yml b/examples/cerebras/btlm-ft.yml
index 6190714b4..c9878779d 100644
--- a/examples/cerebras/btlm-ft.yml
+++ b/examples/cerebras/btlm-ft.yml
@@ -8,7 +8,6 @@ tokenizer_type: GPT2Tokenizer
 trust_remote_code: true
 tokenizer_use_fast: true
 tokenizer_legacy: true
-strict: false
 push_dataset_to_hub:
 hf_use_auth_token: true
 datasets:
diff --git a/examples/cerebras/qlora.yml b/examples/cerebras/qlora.yml
index e74b2d675..55cc597f1 100644
--- a/examples/cerebras/qlora.yml
+++ b/examples/cerebras/qlora.yml
@@ -4,7 +4,6 @@ base_model: cerebras/Cerebras-GPT-1.3B
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 push_dataset_to_hub:
 datasets:
   - path: teknium/GPT4-LLM-Cleaned
diff --git a/examples/code-llama/13b/lora.yml b/examples/code-llama/13b/lora.yml
index 6c205ae87..0ed2382ba 100644
--- a/examples/code-llama/13b/lora.yml
+++ b/examples/code-llama/13b/lora.yml
@@ -7,7 +7,6 @@ tokenizer_type: CodeLlamaTokenizer
 
 load_in_8bit: true
 load_in_4bit: false
-strict: false
 
 datasets:
   - path: mhenrichsen/alpaca_2k_test
diff --git a/examples/code-llama/13b/qlora.yml b/examples/code-llama/13b/qlora.yml
index 28f0275d3..22bd1691b 100644
--- a/examples/code-llama/13b/qlora.yml
+++ b/examples/code-llama/13b/qlora.yml
@@ -7,7 +7,6 @@ tokenizer_type: CodeLlamaTokenizer
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 
 datasets:
   - path: mhenrichsen/alpaca_2k_test
diff --git a/examples/code-llama/34b/lora.yml b/examples/code-llama/34b/lora.yml
index 6024ce3f7..25dc9f421 100644
--- a/examples/code-llama/34b/lora.yml
+++ b/examples/code-llama/34b/lora.yml
@@ -7,7 +7,6 @@ tokenizer_type: CodeLlamaTokenizer
 
 load_in_8bit: true
 load_in_4bit: false
-strict: false
 
 datasets:
   - path: mhenrichsen/alpaca_2k_test
diff --git a/examples/code-llama/34b/qlora.yml b/examples/code-llama/34b/qlora.yml
index 56c276cc9..0e33e2a45 100644
--- a/examples/code-llama/34b/qlora.yml
+++ b/examples/code-llama/34b/qlora.yml
@@ -7,7 +7,6 @@ tokenizer_type: CodeLlamaTokenizer
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 
 datasets:
   - path: mhenrichsen/alpaca_2k_test
diff --git a/examples/code-llama/7b/lora.yml b/examples/code-llama/7b/lora.yml
index 0eb20c244..d288b9f65 100644
--- a/examples/code-llama/7b/lora.yml
+++ b/examples/code-llama/7b/lora.yml
@@ -7,7 +7,6 @@ tokenizer_type: CodeLlamaTokenizer
 
 load_in_8bit: true
 load_in_4bit: false
-strict: false
 
 datasets:
   - path: mhenrichsen/alpaca_2k_test
diff --git a/examples/code-llama/7b/qlora.yml b/examples/code-llama/7b/qlora.yml
index f078f1398..de41c0123 100644
--- a/examples/code-llama/7b/qlora.yml
+++ b/examples/code-llama/7b/qlora.yml
@@ -7,7 +7,6 @@ tokenizer_type: CodeLlamaTokenizer
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 
 datasets:
   - path: mhenrichsen/alpaca_2k_test
diff --git a/examples/cohere/command-r-7b-qlora.yml b/examples/cohere/command-r-7b-qlora.yml
index 8a2b6eacd..4a30e9a77 100644
--- a/examples/cohere/command-r-7b-qlora.yml
+++ b/examples/cohere/command-r-7b-qlora.yml
@@ -4,7 +4,6 @@ tokenizer_type: AutoTokenizer
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 
 # huggingface repo
 chat_template: cohere
diff --git a/examples/dbrx/16bit-lora.yaml b/examples/dbrx/16bit-lora.yaml
index 1724c1426..852654d49 100644
--- a/examples/dbrx/16bit-lora.yaml
+++ b/examples/dbrx/16bit-lora.yaml
@@ -3,7 +3,6 @@ base_model: LnL-AI/dbrx-base-converted-v2
 # hub_model_id: username/custom_model_name
 
 trust_remote_code: true
-strict: false
 
 datasets:
   - path: tatsu-lab/alpaca
diff --git a/examples/dbrx/8bit-lora.yaml b/examples/dbrx/8bit-lora.yaml
index 308483adf..0b9402194 100644
--- a/examples/dbrx/8bit-lora.yaml
+++ b/examples/dbrx/8bit-lora.yaml
@@ -6,7 +6,6 @@ trust_remote_code: true
 
 load_in_8bit: true
 load_in_4bit: false
-strict: false
 
 datasets:
   - path: tatsu-lab/alpaca
diff --git a/examples/dbrx/fft-ds-zero3.yaml b/examples/dbrx/fft-ds-zero3.yaml
index 0fbb5b068..e42c16673 100644
--- a/examples/dbrx/fft-ds-zero3.yaml
+++ b/examples/dbrx/fft-ds-zero3.yaml
@@ -3,7 +3,6 @@ base_model: LnL-AI/dbrx-base-converted-v2
 # hub_model_id: username/custom_model_name
 
 trust_remote_code: true
-strict: false
 
 datasets:
   - path: tatsu-lab/alpaca
diff --git a/examples/deepseek-v2/fft-fsdp-16b.yaml b/examples/deepseek-v2/fft-fsdp-16b.yaml
index 3fe8691a3..0ed97db36 100644
--- a/examples/deepseek-v2/fft-fsdp-16b.yaml
+++ b/examples/deepseek-v2/fft-fsdp-16b.yaml
@@ -2,7 +2,6 @@ base_model: deepseek-ai/DeepSeek-V2-Lite
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 trust_remote_code: true
-strict: false
 
 datasets:
   - path: tatsu-lab/alpaca
diff --git a/examples/deepseek-v2/qlora-fsdp-2_5.yaml b/examples/deepseek-v2/qlora-fsdp-2_5.yaml
index a554970b6..34dbeaafe 100644
--- a/examples/deepseek-v2/qlora-fsdp-2_5.yaml
+++ b/examples/deepseek-v2/qlora-fsdp-2_5.yaml
@@ -6,7 +6,6 @@ trust_remote_code: true
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 
 
 plugins:
diff --git a/examples/falcon/config-7b-lora.yml b/examples/falcon/config-7b-lora.yml
index 2d9240e8b..391d4dd94 100644
--- a/examples/falcon/config-7b-lora.yml
+++ b/examples/falcon/config-7b-lora.yml
@@ -11,7 +11,6 @@ trust_remote_code: true
 load_in_8bit: true
 load_in_4bit: false
 gptq: false
-strict: false
 push_dataset_to_hub:
 datasets:
   - path: teknium/GPT4-LLM-Cleaned
diff --git a/examples/falcon/config-7b-qlora.yml b/examples/falcon/config-7b-qlora.yml
index 78323db5f..a9af8574c 100644
--- a/examples/falcon/config-7b-qlora.yml
+++ b/examples/falcon/config-7b-qlora.yml
@@ -15,7 +15,6 @@ load_in_8bit: false
 # enable 4bit for QLoRA
 load_in_4bit: true
 gptq: false
-strict: false
 push_dataset_to_hub:
 datasets:
   - path: QingyiSi/Alpaca-CoT
diff --git a/examples/falcon/config-7b.yml b/examples/falcon/config-7b.yml
index a796b89dd..3cc553daa 100644
--- a/examples/falcon/config-7b.yml
+++ b/examples/falcon/config-7b.yml
@@ -8,7 +8,6 @@ tokenizer_type: AutoTokenizer
 # required by falcon custom model code: https://huggingface.co/tiiuae/falcon-7b/tree/main
 trust_remote_code: true
 gptq: false
-strict: false
 push_dataset_to_hub:
 datasets:
   - path: teknium/GPT4-LLM-Cleaned
diff --git a/examples/gemma/qlora.yml b/examples/gemma/qlora.yml
index 505564269..2738112b4 100644
--- a/examples/gemma/qlora.yml
+++ b/examples/gemma/qlora.yml
@@ -8,7 +8,6 @@ tokenizer_type: AutoTokenizer
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 
 # huggingface repo
 datasets:
diff --git a/examples/gemma2/qlora.yml b/examples/gemma2/qlora.yml
index afba83552..cb96a32c1 100644
--- a/examples/gemma2/qlora.yml
+++ b/examples/gemma2/qlora.yml
@@ -7,7 +7,6 @@ tokenizer_type: AutoTokenizer
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 
 # huggingface repo
 chat_template: gemma
diff --git a/examples/gemma2/reward-model.yaml b/examples/gemma2/reward-model.yaml
index d828af939..ce01a4572 100644
--- a/examples/gemma2/reward-model.yaml
+++ b/examples/gemma2/reward-model.yaml
@@ -5,7 +5,6 @@ num_labels: 1
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
-strict: false
 
 reward_model: true
 chat_template: gemma
diff --git a/examples/gemma3/gemma-3-1b-qlora.yml b/examples/gemma3/gemma-3-1b-qlora.yml
index 732b914a8..44310558c 100644
--- a/examples/gemma3/gemma-3-1b-qlora.yml
+++ b/examples/gemma3/gemma-3-1b-qlora.yml
@@ -10,7 +10,6 @@ ddp_find_unused_parameters: true
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 
 # huggingface repo
 chat_template: gemma3
diff --git a/examples/gemma3/gemma-3-4b-qlora.yml b/examples/gemma3/gemma-3-4b-qlora.yml
index 85e5dce68..29f8cc1e1 100644
--- a/examples/gemma3/gemma-3-4b-qlora.yml
+++ b/examples/gemma3/gemma-3-4b-qlora.yml
@@ -1,5 +1,4 @@
 base_model: google/gemma-3-4b-it
-strict: false
 
 load_in_4bit: true
 
diff --git a/examples/gemma3/gemma-3-4b-vision-qlora.yml b/examples/gemma3/gemma-3-4b-vision-qlora.yml
index 92273380c..3fd9eb5f0 100644
--- a/examples/gemma3/gemma-3-4b-vision-qlora.yml
+++ b/examples/gemma3/gemma-3-4b-vision-qlora.yml
@@ -1,6 +1,5 @@
 base_model: google/gemma-3-4b-it
 processor_type: AutoProcessor
-strict: false
 
 load_in_4bit: true
 
diff --git a/examples/gptj/qlora.yml b/examples/gptj/qlora.yml
index 086d425b5..c3cf9f973 100644
--- a/examples/gptj/qlora.yml
+++ b/examples/gptj/qlora.yml
@@ -4,7 +4,6 @@ base_model: EleutherAI/gpt-j-6b
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 push_dataset_to_hub:
 datasets:
   - path: teknium/GPT4-LLM-Cleaned
diff --git a/examples/jamba/qlora.yaml b/examples/jamba/qlora.yaml
index 7d642cb0a..2cb0eea41 100644
--- a/examples/jamba/qlora.yaml
+++ b/examples/jamba/qlora.yaml
@@ -6,7 +6,6 @@ trust_remote_code: true
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 
 datasets:
   - path: mhenrichsen/alpaca_2k_test
diff --git a/examples/jamba/qlora_deepspeed.yaml b/examples/jamba/qlora_deepspeed.yaml
index d983dc391..d13ce6483 100644
--- a/examples/jamba/qlora_deepspeed.yaml
+++ b/examples/jamba/qlora_deepspeed.yaml
@@ -5,7 +5,6 @@ trust_remote_code: true
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 
 datasets:
   - path: mhenrichsen/alpaca_2k_test
diff --git a/examples/jamba/qlora_fsdp_large.yaml b/examples/jamba/qlora_fsdp_large.yaml
index a968d99d7..6badaba19 100644
--- a/examples/jamba/qlora_fsdp_large.yaml
+++ b/examples/jamba/qlora_fsdp_large.yaml
@@ -5,7 +5,6 @@ tokenizer_type: AutoTokenizer
 # hub_model_id: username/custom_model_name
 
 load_in_4bit: true
-strict: false
 use_tensorboard: true
 chat_template: jamba
 datasets:
diff --git a/examples/llama-2/fft_optimized.yml b/examples/llama-2/fft_optimized.yml
index fd78fbfee..86b1b6a21 100644
--- a/examples/llama-2/fft_optimized.yml
+++ b/examples/llama-2/fft_optimized.yml
@@ -4,7 +4,6 @@ model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
-strict: false
 
 datasets:
   - path: mhenrichsen/alpaca_2k_test
diff --git a/examples/llama-2/gptq-lora.yml b/examples/llama-2/gptq-lora.yml
index ad2dbd9cf..0f1b34016 100644
--- a/examples/llama-2/gptq-lora.yml
+++ b/examples/llama-2/gptq-lora.yml
@@ -10,7 +10,6 @@ gptq_disable_exllama: true
 
 tokenizer_use_fast: true
 tokenizer_legacy: true
-strict: false
 push_dataset_to_hub:
 hf_use_auth_token: true
 datasets:
diff --git a/examples/llama-2/lisa.yml b/examples/llama-2/lisa.yml
index 585fa9428..a76a792ae 100644
--- a/examples/llama-2/lisa.yml
+++ b/examples/llama-2/lisa.yml
@@ -4,7 +4,6 @@ model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
-strict: false
 
 datasets:
   - path: teknium/GPT4-LLM-Cleaned
diff --git a/examples/llama-2/loftq.yml b/examples/llama-2/loftq.yml
index bf32c7b27..22dbf2d99 100644
--- a/examples/llama-2/loftq.yml
+++ b/examples/llama-2/loftq.yml
@@ -4,7 +4,6 @@ model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
-strict: false
 
 datasets:
   - path: mhenrichsen/alpaca_2k_test
diff --git a/examples/llama-2/lora.yml b/examples/llama-2/lora.yml
index 3ef607ab4..679aed3a9 100644
--- a/examples/llama-2/lora.yml
+++ b/examples/llama-2/lora.yml
@@ -7,7 +7,6 @@ tokenizer_type: LlamaTokenizer
 
 load_in_8bit: true
 load_in_4bit: false
-strict: false
 
 datasets:
   - path: mhenrichsen/alpaca_2k_test
diff --git a/examples/llama-2/qlora-fsdp.yml b/examples/llama-2/qlora-fsdp.yml
index 759f08024..a42eabd4b 100644
--- a/examples/llama-2/qlora-fsdp.yml
+++ b/examples/llama-2/qlora-fsdp.yml
@@ -7,7 +7,6 @@ tokenizer_type: LlamaTokenizer
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 
 datasets:
   - path: yahma/alpaca-cleaned
diff --git a/examples/llama-2/qlora.yml b/examples/llama-2/qlora.yml
index c678a0042..de65928bc 100644
--- a/examples/llama-2/qlora.yml
+++ b/examples/llama-2/qlora.yml
@@ -7,7 +7,6 @@ tokenizer_type: LlamaTokenizer
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 
 datasets:
   - path: mhenrichsen/alpaca_2k_test
diff --git a/examples/llama-2/relora.yml b/examples/llama-2/relora.yml
index 6c943d009..e0a5f7068 100644
--- a/examples/llama-2/relora.yml
+++ b/examples/llama-2/relora.yml
@@ -5,7 +5,6 @@ tokenizer_type: LlamaTokenizer
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 
 datasets:
   - path: teknium/GPT4-LLM-Cleaned
diff --git a/examples/llama-3-vision/lora-11b.yaml b/examples/llama-3-vision/lora-11b.yaml
index 4431878fa..f4883e903 100644
--- a/examples/llama-3-vision/lora-11b.yaml
+++ b/examples/llama-3-vision/lora-11b.yaml
@@ -4,7 +4,6 @@ processor_type: AutoProcessor
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 
-strict: false
 
 # these 3 lines are needed for now to handle vision chat templates w images
 skip_prepare_dataset: true
diff --git a/examples/llama-3/fft-8b-liger-fsdp.yaml b/examples/llama-3/fft-8b-liger-fsdp.yaml
index 50169879c..eccfa6d8c 100644
--- a/examples/llama-3/fft-8b-liger-fsdp.yaml
+++ b/examples/llama-3/fft-8b-liger-fsdp.yaml
@@ -9,7 +9,6 @@ liger_rms_norm: true
 liger_glu_activation: true
 liger_fused_linear_cross_entropy: true
 
-strict: false
 
 chat_template: llama3
 datasets:
diff --git a/examples/llama-3/fft-8b.yaml b/examples/llama-3/fft-8b.yaml
index 4452a6e3d..fdae3e6c4 100644
--- a/examples/llama-3/fft-8b.yaml
+++ b/examples/llama-3/fft-8b.yaml
@@ -1,7 +1,6 @@
 base_model: NousResearch/Meta-Llama-3.1-8B
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
-strict: false
 
 datasets:
   - path: tatsu-lab/alpaca
diff --git a/examples/llama-3/instruct-dpo-lora-8b.yml b/examples/llama-3/instruct-dpo-lora-8b.yml
index a1b923fb6..13082294f 100644
--- a/examples/llama-3/instruct-dpo-lora-8b.yml
+++ b/examples/llama-3/instruct-dpo-lora-8b.yml
@@ -7,7 +7,6 @@ tokenizer_type: AutoTokenizer
 
 load_in_8bit: true
 load_in_4bit: false
-strict: false
 
 chat_template: llama3
 rl: dpo
diff --git a/examples/llama-3/instruct-lora-8b.yml b/examples/llama-3/instruct-lora-8b.yml
index 362bda9aa..acab862f6 100644
--- a/examples/llama-3/instruct-lora-8b.yml
+++ b/examples/llama-3/instruct-lora-8b.yml
@@ -7,7 +7,6 @@ tokenizer_type: AutoTokenizer
 
 load_in_8bit: true
 load_in_4bit: false
-strict: false
 
 chat_template: llama3
 datasets:
diff --git a/examples/llama-3/lora-1b-deduplicate-dpo.yml b/examples/llama-3/lora-1b-deduplicate-dpo.yml
index e4b2a5244..10e9747cb 100644
--- a/examples/llama-3/lora-1b-deduplicate-dpo.yml
+++ b/examples/llama-3/lora-1b-deduplicate-dpo.yml
@@ -7,7 +7,6 @@ tokenizer_type: AutoTokenizer
 
 load_in_8bit: true
 load_in_4bit: false
-strict: false
 
 chat_template: llama3
 rl: dpo
diff --git a/examples/llama-3/lora-1b-deduplicate-sft.yml b/examples/llama-3/lora-1b-deduplicate-sft.yml
index b8c21fafb..630ec92f6 100644
--- a/examples/llama-3/lora-1b-deduplicate-sft.yml
+++ b/examples/llama-3/lora-1b-deduplicate-sft.yml
@@ -7,7 +7,6 @@ tokenizer_type: AutoTokenizer
 
 load_in_8bit: true
 load_in_4bit: false
-strict: false
 
 datasets:
   - path: mhenrichsen/alpaca_2k_test
diff --git a/examples/llama-3/lora-1b-kernels.yml b/examples/llama-3/lora-1b-kernels.yml
index b76f03801..a2d07ca49 100644
--- a/examples/llama-3/lora-1b-kernels.yml
+++ b/examples/llama-3/lora-1b-kernels.yml
@@ -1,7 +1,6 @@
 base_model: NousResearch/Llama-3.2-1B
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
-strict: false
 
 datasets:
   - path: teknium/GPT4-LLM-Cleaned
diff --git a/examples/llama-3/lora-1b-ray.yml b/examples/llama-3/lora-1b-ray.yml
index 199fe3b5d..bb23164eb 100644
--- a/examples/llama-3/lora-1b-ray.yml
+++ b/examples/llama-3/lora-1b-ray.yml
@@ -1,7 +1,6 @@
 base_model: NousResearch/Llama-3.2-1B
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
-strict: false
 
 datasets:
   - path: teknium/GPT4-LLM-Cleaned
diff --git a/examples/llama-3/lora-1b-sample-packing-sequentially.yml b/examples/llama-3/lora-1b-sample-packing-sequentially.yml
index a027673ab..769dd32e6 100644
--- a/examples/llama-3/lora-1b-sample-packing-sequentially.yml
+++ b/examples/llama-3/lora-1b-sample-packing-sequentially.yml
@@ -7,7 +7,6 @@ tokenizer_type: AutoTokenizer
 
 load_in_8bit: true
 load_in_4bit: false
-strict: false
 
 datasets:
   - path: mhenrichsen/alpaca_2k_test
diff --git a/examples/llama-3/lora-1b.yml b/examples/llama-3/lora-1b.yml
index 8a536260a..c31a9f39a 100644
--- a/examples/llama-3/lora-1b.yml
+++ b/examples/llama-3/lora-1b.yml
@@ -1,7 +1,6 @@
 base_model: NousResearch/Llama-3.2-1B
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
-strict: false
 
 datasets:
   - path: teknium/GPT4-LLM-Cleaned
diff --git a/examples/llama-3/lora-8b.yml b/examples/llama-3/lora-8b.yml
index 700dd4614..ad50cd38a 100644
--- a/examples/llama-3/lora-8b.yml
+++ b/examples/llama-3/lora-8b.yml
@@ -7,7 +7,6 @@ tokenizer_type: AutoTokenizer
 
 load_in_8bit: true
 load_in_4bit: false
-strict: false
 
 datasets:
   - path: mhenrichsen/alpaca_2k_test
diff --git a/examples/llama-3/qlora-1b-kto.yaml b/examples/llama-3/qlora-1b-kto.yaml
index 0dc37b40a..89a51ea68 100644
--- a/examples/llama-3/qlora-1b-kto.yaml
+++ b/examples/llama-3/qlora-1b-kto.yaml
@@ -4,7 +4,6 @@ base_model: meta-llama/Llama-3.2-1B
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 
 rl: kto
 rl_beta: 0.5
diff --git a/examples/llama-3/qlora-1b.yml b/examples/llama-3/qlora-1b.yml
index c42dd2238..5c8fe6628 100644
--- a/examples/llama-3/qlora-1b.yml
+++ b/examples/llama-3/qlora-1b.yml
@@ -4,7 +4,6 @@ base_model: NousResearch/Llama-3.2-1B
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 
 datasets:
   - path: teknium/GPT4-LLM-Cleaned
diff --git a/examples/llama-3/qlora-fsdp-405b.yaml b/examples/llama-3/qlora-fsdp-405b.yaml
index 75c8f5973..2b7d51925 100644
--- a/examples/llama-3/qlora-fsdp-405b.yaml
+++ b/examples/llama-3/qlora-fsdp-405b.yaml
@@ -5,7 +5,6 @@ tokenizer_type: AutoTokenizer
 # hub_model_id: username/custom_model_name
 
 load_in_4bit: true
-strict: false
 
 datasets:
   - path: tatsu-lab/alpaca
diff --git a/examples/llama-3/qlora-fsdp-70b.yaml b/examples/llama-3/qlora-fsdp-70b.yaml
index c4889d643..412b6721c 100644
--- a/examples/llama-3/qlora-fsdp-70b.yaml
+++ b/examples/llama-3/qlora-fsdp-70b.yaml
@@ -7,7 +7,6 @@ tokenizer_type: AutoTokenizer  # PreTrainedTokenizerFast
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 
 datasets:
   - path: tatsu-lab/alpaca
diff --git a/examples/llama-3/qlora.yml b/examples/llama-3/qlora.yml
index 607deb896..4cc9fc3db 100644
--- a/examples/llama-3/qlora.yml
+++ b/examples/llama-3/qlora.yml
@@ -7,7 +7,6 @@ tokenizer_type: AutoTokenizer
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 
 datasets:
   - path: aaditya/alpaca_subset_1
diff --git a/examples/llama-4/maverick-qlora-fsdp1.yaml b/examples/llama-4/maverick-qlora-fsdp1.yaml
index 232afc73e..2be94f4ef 100644
--- a/examples/llama-4/maverick-qlora-fsdp1.yaml
+++ b/examples/llama-4/maverick-qlora-fsdp1.yaml
@@ -3,7 +3,6 @@ model_type: Llama4ForConditionalGeneration
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 
-strict: false
 
 plugins:
   - axolotl.integrations.liger.LigerPlugin
diff --git a/examples/llama-4/scout-qlora-fsdp1.yaml b/examples/llama-4/scout-qlora-fsdp1.yaml
index ad2e46786..eeae872a6 100644
--- a/examples/llama-4/scout-qlora-fsdp1.yaml
+++ b/examples/llama-4/scout-qlora-fsdp1.yaml
@@ -3,7 +3,6 @@ model_type: Llama4ForConditionalGeneration
   # Automatically upload checkpoint and final model to HF
   # hub_model_id: username/custom_model_name
 
-strict: false
 
 # torch_compile: true
 plugins:
diff --git a/examples/llama-4/scout-qlora-single-h100.yaml b/examples/llama-4/scout-qlora-single-h100.yaml
index 23a3a2195..17ad70634 100644
--- a/examples/llama-4/scout-qlora-single-h100.yaml
+++ b/examples/llama-4/scout-qlora-single-h100.yaml
@@ -3,7 +3,6 @@ model_type: Llama4ForConditionalGeneration
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 
-strict: false
 
 plugins:
   - axolotl.integrations.liger.LigerPlugin
diff --git a/examples/llama-4/scout-vision-qlora-fsdp.yaml b/examples/llama-4/scout-vision-qlora-fsdp.yaml
index 8b8c9abd1..eff708e4d 100644
--- a/examples/llama-4/scout-vision-qlora-fsdp.yaml
+++ b/examples/llama-4/scout-vision-qlora-fsdp.yaml
@@ -4,7 +4,6 @@ processor_type: Llama4Processor
   # Automatically upload checkpoint and final model to HF
   # hub_model_id: username/custom_model_name
 
-strict: false
 
 # these 3 lines are needed for now to handle vision chat templates w images
 skip_prepare_dataset: true
diff --git a/examples/llava/lora-7b.yaml b/examples/llava/lora-7b.yaml
index 68e463585..54edd04dc 100644
--- a/examples/llava/lora-7b.yaml
+++ b/examples/llava/lora-7b.yaml
@@ -1,6 +1,5 @@
 base_model: llava-hf/llava-1.5-7b-hf
 processor_type: AutoProcessor
-strict: false
 
 # these 3 lines are needed for now to handle vision chat templates w images
 skip_prepare_dataset: true
diff --git a/examples/mamba/config.yml b/examples/mamba/config.yml
index 3a114bac7..3d4583932 100644
--- a/examples/mamba/config.yml
+++ b/examples/mamba/config.yml
@@ -5,7 +5,6 @@ tokenizer_type: AutoTokenizer
 tokenizer_config: EleutherAI/gpt-neox-20b
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
-strict: false
 
 datasets:
   - path: mhenrichsen/alpaca_2k_test
diff --git a/examples/mistral/bigstral-ds-zero3.yaml b/examples/mistral/bigstral-ds-zero3.yaml
index bef989932..f626a92a1 100644
--- a/examples/mistral/bigstral-ds-zero3.yaml
+++ b/examples/mistral/bigstral-ds-zero3.yaml
@@ -6,7 +6,6 @@ tokenizer_type: LlamaTokenizer
 # hub_model_id: username/custom_model_name
 
 trust_remote_code: true
-strict: false
 
 unfrozen_parameters:
   - ^lm_head.weight$
diff --git a/examples/mistral/config.yml b/examples/mistral/config.yml
index c58e2c954..15edffb44 100644
--- a/examples/mistral/config.yml
+++ b/examples/mistral/config.yml
@@ -4,7 +4,6 @@ model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
-strict: false
 
 datasets:
   - path: mhenrichsen/alpaca_2k_test
diff --git a/examples/mistral/lora-mps.yml b/examples/mistral/lora-mps.yml
index ba61cac11..e6f46affb 100644
--- a/examples/mistral/lora-mps.yml
+++ b/examples/mistral/lora-mps.yml
@@ -4,7 +4,6 @@ model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
-strict: false
 
 datasets:
   - path: mhenrichsen/alpaca_2k_test
diff --git a/examples/mistral/lora.yml b/examples/mistral/lora.yml
index b55c66715..9af4274fd 100644
--- a/examples/mistral/lora.yml
+++ b/examples/mistral/lora.yml
@@ -7,7 +7,6 @@ tokenizer_type: LlamaTokenizer
 
 load_in_8bit: true
 load_in_4bit: false
-strict: false
 
 datasets:
   - path: mhenrichsen/alpaca_2k_test
diff --git a/examples/mistral/mistral-dpo-qlora.yml b/examples/mistral/mistral-dpo-qlora.yml
index 6446cacb8..af707973f 100644
--- a/examples/mistral/mistral-dpo-qlora.yml
+++ b/examples/mistral/mistral-dpo-qlora.yml
@@ -12,7 +12,6 @@ tokenizer_type: LlamaTokenizer
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 
 chat_template: chatml
 rl: dpo
diff --git a/examples/mistral/mistral-qlora-fsdp.yml b/examples/mistral/mistral-qlora-fsdp.yml
index 5825ac749..e234b19a2 100644
--- a/examples/mistral/mistral-qlora-fsdp.yml
+++ b/examples/mistral/mistral-qlora-fsdp.yml
@@ -9,7 +9,6 @@ trust_remote_code: true
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 
 datasets:
   - path: tatsu-lab/alpaca
diff --git a/examples/mistral/mistral-qlora-orpo.yml b/examples/mistral/mistral-qlora-orpo.yml
index 9c6ae74ef..6c0212b7c 100644
--- a/examples/mistral/mistral-qlora-orpo.yml
+++ b/examples/mistral/mistral-qlora-orpo.yml
@@ -7,7 +7,6 @@ tokenizer_type: LlamaTokenizer
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 
 rl: orpo
 orpo_alpha: 0.1
diff --git a/examples/mistral/mistral-small-3.1-24B-lora.yml b/examples/mistral/mistral-small-3.1-24B-lora.yml
index 0e6b4402d..198b3f373 100644
--- a/examples/mistral/mistral-small-3.1-24B-lora.yml
+++ b/examples/mistral/mistral-small-3.1-24B-lora.yml
@@ -1,6 +1,5 @@
 base_model: mistralai/Mistral-Small-3.1-24B-Instruct-2503
 processor_type: AutoProcessor
-strict: false
 
 load_in_8bit: true
 
diff --git a/examples/mistral/mixtral-8x22b-qlora-fsdp.yml b/examples/mistral/mixtral-8x22b-qlora-fsdp.yml
index e29b6392a..af6ba5a76 100644
--- a/examples/mistral/mixtral-8x22b-qlora-fsdp.yml
+++ b/examples/mistral/mixtral-8x22b-qlora-fsdp.yml
@@ -7,7 +7,6 @@ tokenizer_type: LlamaTokenizer
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 
 datasets:
   - path: tatsu-lab/alpaca
diff --git a/examples/mistral/mixtral-qlora-fsdp.yml b/examples/mistral/mixtral-qlora-fsdp.yml
index 40bb5d5d1..b1843a138 100644
--- a/examples/mistral/mixtral-qlora-fsdp.yml
+++ b/examples/mistral/mixtral-qlora-fsdp.yml
@@ -9,7 +9,6 @@ trust_remote_code: true
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 
 datasets:
   - path: tatsu-lab/alpaca
diff --git a/examples/mistral/mixtral.yml b/examples/mistral/mixtral.yml
index eefd2456d..4c256420c 100644
--- a/examples/mistral/mixtral.yml
+++ b/examples/mistral/mixtral.yml
@@ -9,7 +9,6 @@ trust_remote_code: true
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 
 datasets:
   - path: tatsu-lab/alpaca
diff --git a/examples/mistral/mixtral_22.yml b/examples/mistral/mixtral_22.yml
index 1debd793a..25e1d7155 100644
--- a/examples/mistral/mixtral_22.yml
+++ b/examples/mistral/mixtral_22.yml
@@ -6,7 +6,6 @@ tokenizer_type: LlamaTokenizer
 # hub_model_id: username/custom_model_name
 
 trust_remote_code: true
-strict: false
 
 unfrozen_parameters:
   - ^lm_head.weight$
diff --git a/examples/mistral/qlora.yml b/examples/mistral/qlora.yml
index 921f3fe7b..607e33701 100644
--- a/examples/mistral/qlora.yml
+++ b/examples/mistral/qlora.yml
@@ -7,7 +7,6 @@ tokenizer_type: LlamaTokenizer
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 
 datasets:
   - path: mhenrichsen/alpaca_2k_test
diff --git a/examples/openllama-3b/config.yml b/examples/openllama-3b/config.yml
index 7a1e2d9a5..17eeb73ae 100644
--- a/examples/openllama-3b/config.yml
+++ b/examples/openllama-3b/config.yml
@@ -4,7 +4,6 @@ model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
-strict: false
 push_dataset_to_hub:
 datasets:
   - path: teknium/GPT4-LLM-Cleaned
diff --git a/examples/openllama-3b/lora.yml b/examples/openllama-3b/lora.yml
index c1c597b73..073117f11 100644
--- a/examples/openllama-3b/lora.yml
+++ b/examples/openllama-3b/lora.yml
@@ -7,7 +7,6 @@ tokenizer_type: LlamaTokenizer
 
 load_in_8bit: true
 load_in_4bit: false
-strict: false
 push_dataset_to_hub:
 datasets:
   - path: teknium/GPT4-LLM-Cleaned
diff --git a/examples/openllama-3b/qlora.yml b/examples/openllama-3b/qlora.yml
index e9c71efd1..b4fca2c07 100644
--- a/examples/openllama-3b/qlora.yml
+++ b/examples/openllama-3b/qlora.yml
@@ -7,7 +7,6 @@ tokenizer_type: LlamaTokenizer
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 push_dataset_to_hub:
 datasets:
   - path: teknium/GPT4-LLM-Cleaned
diff --git a/examples/phi/lora-3.5.yaml b/examples/phi/lora-3.5.yaml
index 2ecb9d28d..ad4ce9cd4 100644
--- a/examples/phi/lora-3.5.yaml
+++ b/examples/phi/lora-3.5.yaml
@@ -7,7 +7,6 @@ tokenizer_type: AutoTokenizer
 
 load_in_8bit: true
 load_in_4bit: false
-strict: false
 
 chat_template: phi_3
 datasets:
diff --git a/examples/phi/phi-ft.yml b/examples/phi/phi-ft.yml
index 886671a60..1562a7353 100644
--- a/examples/phi/phi-ft.yml
+++ b/examples/phi/phi-ft.yml
@@ -4,7 +4,6 @@ model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
-strict: false
 
 datasets:
   - path: garage-bAInd/Open-Platypus
diff --git a/examples/phi/phi-qlora.yml b/examples/phi/phi-qlora.yml
index a1cbf8a52..4cd53db97 100644
--- a/examples/phi/phi-qlora.yml
+++ b/examples/phi/phi-qlora.yml
@@ -7,7 +7,6 @@ tokenizer_type: AutoTokenizer
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 
 datasets:
   - path: garage-bAInd/Open-Platypus
diff --git a/examples/phi/phi2-ft.yml b/examples/phi/phi2-ft.yml
index 480017a39..ca733cc71 100644
--- a/examples/phi/phi2-ft.yml
+++ b/examples/phi/phi2-ft.yml
@@ -4,7 +4,6 @@ model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
-strict: false
 
 datasets:
   - path: garage-bAInd/Open-Platypus
diff --git a/examples/phi/phi3-ft-fsdp.yml b/examples/phi/phi3-ft-fsdp.yml
index 766db76f6..d0d14fea6 100644
--- a/examples/phi/phi3-ft-fsdp.yml
+++ b/examples/phi/phi3-ft-fsdp.yml
@@ -4,7 +4,6 @@ model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
-strict: false
 
 datasets:
   - path: mhenrichsen/alpaca_2k_test
diff --git a/examples/phi/phi3-ft.yml b/examples/phi/phi3-ft.yml
index 0a5da2e23..17c48da6f 100644
--- a/examples/phi/phi3-ft.yml
+++ b/examples/phi/phi3-ft.yml
@@ -7,7 +7,6 @@ tokenizer_type: AutoTokenizer
 # hub_model_id: username/custom_model_name
 
 chat_template: phi_3
-strict: false
 
 datasets:
   - path: garage-bAInd/Open-Platypus
diff --git a/examples/pixtral/lora-12b.yml b/examples/pixtral/lora-12b.yml
index d3b3efd70..dec8e4b5e 100644
--- a/examples/pixtral/lora-12b.yml
+++ b/examples/pixtral/lora-12b.yml
@@ -1,6 +1,5 @@
 base_model: mistral-community/pixtral-12b
 processor_type: AutoProcessor
-strict: false
 
 # these 3 lines are needed for now to handle vision chat templates w images
 skip_prepare_dataset: true
diff --git a/examples/qwen/lora.yml b/examples/qwen/lora.yml
index 23385d236..9a2843236 100644
--- a/examples/qwen/lora.yml
+++ b/examples/qwen/lora.yml
@@ -9,7 +9,6 @@ trust_remote_code: true
 
 load_in_8bit: true
 load_in_4bit: false
-strict: false
 
 datasets:
   - path: mhenrichsen/alpaca_2k_test
diff --git a/examples/qwen/qlora.yml b/examples/qwen/qlora.yml
index 854a682fe..5f85b44dd 100644
--- a/examples/qwen/qlora.yml
+++ b/examples/qwen/qlora.yml
@@ -9,7 +9,6 @@ trust_remote_code: true
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 
 datasets:
   - path: mhenrichsen/alpaca_2k_test
diff --git a/examples/qwen/qwen2-moe-lora.yaml b/examples/qwen/qwen2-moe-lora.yaml
index a2a1e4d25..afce443a0 100644
--- a/examples/qwen/qwen2-moe-lora.yaml
+++ b/examples/qwen/qwen2-moe-lora.yaml
@@ -3,7 +3,6 @@ base_model: Qwen/Qwen1.5-MoE-A2.7B
 # hub_model_id: username/custom_model_name
 
 trust_remote_code: true
-strict: false
 
 datasets:
   - path: mhenrichsen/alpaca_2k_test
diff --git a/examples/qwen/qwen2-moe-qlora.yaml b/examples/qwen/qwen2-moe-qlora.yaml
index b1ab131a6..92a6842cf 100644
--- a/examples/qwen/qwen2-moe-qlora.yaml
+++ b/examples/qwen/qwen2-moe-qlora.yaml
@@ -6,7 +6,6 @@ trust_remote_code: true
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 
 datasets:
   - path: mhenrichsen/alpaca_2k_test
diff --git a/examples/qwen2-vl/lora-7b.yaml b/examples/qwen2-vl/lora-7b.yaml
index 3d0b10adf..55773bc3d 100644
--- a/examples/qwen2-vl/lora-7b.yaml
+++ b/examples/qwen2-vl/lora-7b.yaml
@@ -1,6 +1,5 @@
 base_model: Qwen/Qwen2-VL-7B-Instruct
 processor_type: AutoProcessor
-strict: false
 
 # these 3 lines are needed for now to handle vision chat templates w images
 skip_prepare_dataset: true
diff --git a/examples/qwen2/dpo.yaml b/examples/qwen2/dpo.yaml
index 8df7f9dc4..3547c6c98 100644
--- a/examples/qwen2/dpo.yaml
+++ b/examples/qwen2/dpo.yaml
@@ -2,7 +2,6 @@ base_model: Qwen/Qwen2.5-0.5B
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 
-strict: false
 
 chat_template: qwen_25
 rl: dpo
diff --git a/examples/qwen2/prm.yaml b/examples/qwen2/prm.yaml
index 669f8e2db..4afa24f3c 100644
--- a/examples/qwen2/prm.yaml
+++ b/examples/qwen2/prm.yaml
@@ -5,7 +5,6 @@ num_labels: 2
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
-strict: false
 
 process_reward_model: true
 chat_template:
diff --git a/examples/qwen2/qlora-fsdp.yaml b/examples/qwen2/qlora-fsdp.yaml
index 1f2ed83b1..ed2670ab6 100644
--- a/examples/qwen2/qlora-fsdp.yaml
+++ b/examples/qwen2/qlora-fsdp.yaml
@@ -6,7 +6,6 @@ trust_remote_code: true
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 
 datasets:
   - path: tatsu-lab/alpaca
diff --git a/examples/qwen2/reward-model.yaml b/examples/qwen2/reward-model.yaml
index fcbb9867e..822407a1f 100644
--- a/examples/qwen2/reward-model.yaml
+++ b/examples/qwen2/reward-model.yaml
@@ -5,7 +5,6 @@ num_labels: 1
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
-strict: false
 
 reward_model: true
 chat_template: qwen_25
diff --git a/examples/stablelm-2/1.6b/fft.yml b/examples/stablelm-2/1.6b/fft.yml
index f26714856..9b45b399f 100644
--- a/examples/stablelm-2/1.6b/fft.yml
+++ b/examples/stablelm-2/1.6b/fft.yml
@@ -6,7 +6,6 @@ tokenizer_type: AutoTokenizer
 # hub_model_id: username/custom_model_name
 
 trust_remote_code: true
-strict: false
 
 datasets:
   - path: mhenrichsen/alpaca_2k_test
diff --git a/examples/stablelm-2/1.6b/lora.yml b/examples/stablelm-2/1.6b/lora.yml
index aaa9908d1..31e5ad933 100644
--- a/examples/stablelm-2/1.6b/lora.yml
+++ b/examples/stablelm-2/1.6b/lora.yml
@@ -9,7 +9,6 @@ trust_remote_code: true
 
 load_in_8bit: true
 load_in_4bit: false
-strict: false
 
 datasets:
   - path: mhenrichsen/alpaca_2k_test
diff --git a/examples/starcoder2/qlora.yml b/examples/starcoder2/qlora.yml
index 8b21d7145..18d85f9c3 100644
--- a/examples/starcoder2/qlora.yml
+++ b/examples/starcoder2/qlora.yml
@@ -4,7 +4,6 @@ base_model: bigcode/starcoder2-3b
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 
 datasets:
   - path: mhenrichsen/alpaca_2k_test
diff --git a/examples/tiny-llama/lora-mps.yml b/examples/tiny-llama/lora-mps.yml
index 8654a39bb..66cf7cfb3 100644
--- a/examples/tiny-llama/lora-mps.yml
+++ b/examples/tiny-llama/lora-mps.yml
@@ -7,7 +7,6 @@ tokenizer_type: LlamaTokenizer
 
 load_in_8bit: true
 load_in_4bit: false
-strict: false
 
 datasets:
   - path: mhenrichsen/alpaca_2k_test
diff --git a/examples/tiny-llama/lora.yml b/examples/tiny-llama/lora.yml
index 64b2360d3..90998880f 100644
--- a/examples/tiny-llama/lora.yml
+++ b/examples/tiny-llama/lora.yml
@@ -6,7 +6,6 @@ tokenizer_type: AutoTokenizer
 
 load_in_8bit: true
 load_in_4bit: false
-strict: false
 
 datasets:
   - path: mhenrichsen/alpaca_2k_test
diff --git a/examples/tiny-llama/pretrain.yml b/examples/tiny-llama/pretrain.yml
index 2984c52ae..5b3706bcb 100644
--- a/examples/tiny-llama/pretrain.yml
+++ b/examples/tiny-llama/pretrain.yml
@@ -4,7 +4,6 @@ model_type: LlamaForCausalLM
 tokenizer_type: LlamaTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
-strict: false
 
 max_steps: 200
 pretraining_dataset:
diff --git a/examples/tiny-llama/qlora.yml b/examples/tiny-llama/qlora.yml
index 79e3164a5..8b2a4565a 100644
--- a/examples/tiny-llama/qlora.yml
+++ b/examples/tiny-llama/qlora.yml
@@ -7,7 +7,6 @@ tokenizer_type: LlamaTokenizer
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 
 datasets:
   - path: mhenrichsen/alpaca_2k_test
diff --git a/examples/xgen-7b/xgen-7b-8k-qlora.yml b/examples/xgen-7b/xgen-7b-8k-qlora.yml
index f4ff589e0..48066b130 100644
--- a/examples/xgen-7b/xgen-7b-8k-qlora.yml
+++ b/examples/xgen-7b/xgen-7b-8k-qlora.yml
@@ -13,7 +13,6 @@ load_in_8bit: false
 # enable 4bit for QLoRA
 load_in_4bit: true
 gptq: false
-strict: false
 push_dataset_to_hub:
 datasets:
   - path: timdettmers/openassistant-guanaco
diff --git a/examples/yi-34B-chat/qlora.yml b/examples/yi-34B-chat/qlora.yml
index de79ed6ce..a0a95d86f 100644
--- a/examples/yi-34B-chat/qlora.yml
+++ b/examples/yi-34B-chat/qlora.yml
@@ -7,7 +7,6 @@ tokenizer_type: LlamaTokenizer
 
 load_in_8bit: false
 load_in_4bit: true
-strict: false
 sequence_len: 1024
 bf16: auto
 tf32: false