diff --git a/examples/gemma3/gemma-3-1b-qlora.yml b/examples/gemma3/gemma-3-1b-qlora.yml index d84368bc0..f6fc6955c 100644 --- a/examples/gemma3/gemma-3-1b-qlora.yml +++ b/examples/gemma3/gemma-3-1b-qlora.yml @@ -1,8 +1,5 @@ base_model: google/gemma-3-1b-it -model_type: Gemma3ForCausalLM -cls_model_config: Gemma3TextConfig - # Automatically upload checkpoint and final model to HF # hub_model_id: username/custom_model_name @@ -27,6 +24,11 @@ datasets: val_set_size: 0.0 output_dir: ./outputs/out +# Freeze vision tower +unfrozen_parameters: + - ^model\.language_model\..* + - ^lm_head\..* + adapter: qlora lora_r: 32 lora_alpha: 16 diff --git a/examples/gemma3/gemma-3-270m-qlora.yml b/examples/gemma3/gemma-3-270m-qlora.yml index 14ea2aaba..99202f29f 100644 --- a/examples/gemma3/gemma-3-270m-qlora.yml +++ b/examples/gemma3/gemma-3-270m-qlora.yml @@ -1,8 +1,5 @@ base_model: google/gemma-3-270m-it -model_type: Gemma3ForCausalLM -cls_model_config: Gemma3TextConfig - # Automatically upload checkpoint and final model to HF # hub_model_id: username/custom_model_name @@ -27,6 +24,11 @@ datasets: val_set_size: 0.0 output_dir: ./outputs/out +# Freeze vision tower +unfrozen_parameters: + - ^model\.language_model\..* + - ^lm_head\..* + adapter: qlora lora_r: 32 lora_alpha: 16 diff --git a/examples/gemma3/gemma-3-4b-qlora.yml b/examples/gemma3/gemma-3-4b-qlora.yml index 7d44f3c9b..d11f2ea50 100644 --- a/examples/gemma3/gemma-3-4b-qlora.yml +++ b/examples/gemma3/gemma-3-4b-qlora.yml @@ -1,9 +1,5 @@ base_model: google/gemma-3-4b-it -# Need to set else transformers tries to load vision too -model_type: Gemma3ForCausalLM -cls_model_config: Gemma3TextConfig - load_in_4bit: true # gemma3 doesn't seem to play nice with ddp @@ -24,6 +20,11 @@ dataset_prepared_path: last_run_prepared val_set_size: 0.01 output_dir: ./outputs/out +# Freeze vision tower +unfrozen_parameters: + - ^model\.language_model\..* + - ^lm_head\..* + adapter: qlora lora_model_dir: