diff --git a/examples/gemma3/gemma-3-4b-qlora.yml b/examples/gemma3/gemma-3-4b-qlora.yml index 28b7bdacf..b4be17c3c 100644 --- a/examples/gemma3/gemma-3-4b-qlora.yml +++ b/examples/gemma3/gemma-3-4b-qlora.yml @@ -1,6 +1,8 @@ base_model: google/gemma-3-4b-it strict: false +load_in_4bit: true + # gemma3 doesn't seem to play nice with ddp ddp_find_unused_parameters: true @@ -17,7 +19,7 @@ dataset_prepared_path: last_run_prepared val_set_size: 0.01 output_dir: ./outputs/out -adapter: lora +adapter: qlora lora_model_dir: sequence_len: 2048 diff --git a/examples/gemma3/gemma-3-4b-vision-qlora.yml b/examples/gemma3/gemma-3-4b-vision-qlora.yml index d51dd88b1..6e711d6f6 100644 --- a/examples/gemma3/gemma-3-4b-vision-qlora.yml +++ b/examples/gemma3/gemma-3-4b-vision-qlora.yml @@ -2,6 +2,8 @@ base_model: google/gemma-3-4b-it processor_type: AutoProcessor strict: false +load_in_4bit: true + # these 3 lines are needed for now to handle vision chat templates w images skip_prepare_dataset: true remove_unused_columns: false