diff --git a/examples/gemma3/gemma-3-1b-qlora.yml b/examples/gemma3/gemma-3-1b-qlora.yml index 115717db7..2f998d144 100644 --- a/examples/gemma3/gemma-3-1b-qlora.yml +++ b/examples/gemma3/gemma-3-1b-qlora.yml @@ -1,7 +1,7 @@ base_model: google/gemma-3-1b-it -# optionally might have model_type or tokenizer_type -model_type: AutoModelForCausalLM -tokenizer_type: AutoTokenizer + +model_type: Gemma3ForCausalLM + # Automatically upload checkpoint and final model to HF # hub_model_id: username/custom_model_name diff --git a/examples/gemma3/gemma-3-270m-qlora.yml b/examples/gemma3/gemma-3-270m-qlora.yml index 8744fad26..0c60c4a01 100644 --- a/examples/gemma3/gemma-3-270m-qlora.yml +++ b/examples/gemma3/gemma-3-270m-qlora.yml @@ -1,7 +1,7 @@ base_model: google/gemma-3-270m-it -# optionally might have model_type or tokenizer_type -model_type: AutoModelForCausalLM -tokenizer_type: AutoTokenizer + +model_type: Gemma3ForCausalLM + # Automatically upload checkpoint and final model to HF # hub_model_id: username/custom_model_name diff --git a/examples/gemma3/gemma-3-4b-qlora.yml b/examples/gemma3/gemma-3-4b-qlora.yml index 44ba9c879..959521149 100644 --- a/examples/gemma3/gemma-3-4b-qlora.yml +++ b/examples/gemma3/gemma-3-4b-qlora.yml @@ -1,5 +1,8 @@ base_model: google/gemma-3-4b-it +# Need to set else transformers tries to load vision too +model_type: Gemma3ForCausalLM + load_in_4bit: true # gemma3 doesn't seem to play nice with ddp