feat: add sample config

2025-07-01 18:07:14 +07:00
parent 1ef8732599
commit bd8d98d51f
1 changed files with 80 additions and 0 deletions
--- a/examples/gemma3n/gemma-3n-e2b-qlora.yml
+++ b/examples/gemma3n/gemma-3n-e2b-qlora.yml
@@ -0,0 +1,80 @@
 base_model: google/gemma-3n-E2B-it
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
 # Automatically upload checkpoint and final model to HF
 # hub_model_id: username/custom_model_name
 plugins:
  - axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
 cut_cross_entropy: true
 load_in_8bit: false
 load_in_4bit: true
 # for use with fft to only train on language model layers
 # unfrozen_parameters:
  # - model.language_model.*
  # - lm_head
  # - embed_tokens
 # huggingface repo
 # chat_template: gemma3
 eot_tokens:
  - <end_of_turn>
 datasets:
  - path: cgato/SlimOrcaDedupCleaned
    type: chat_template
    split: train[:1%]
    field_messages: conversations
    message_property_mappings:
      role: from
      content: value
 val_set_size: 0.0
 output_dir: ./outputs/out
 adapter: qlora
 lora_r: 32
 lora_alpha: 16
 lora_dropout: 0.05
 # lora_target_linear: # Does not work with gemma3n currently
 lora_target_modules:
  - q_proj
  - k_proj
  - v_proj
  - o_proj
 sequence_len: 2048
 sample_packing: true
 eval_sample_packing: true
 pad_to_sequence_len: true
 wandb_project:
 wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
 gradient_accumulation_steps: 1
 micro_batch_size: 1
 num_epochs: 4
 optimizer: muon
 lr_scheduler: cosine
 learning_rate: 0.0002
 bf16: auto
 tf32: true
 gradient_checkpointing: true
 gradient_checkpointing_kwargs:
  use_reentrant: false
 resume_from_checkpoint:
 logging_steps: 1
 # flash_attention: true  # Any attention impl does not work with gemma3n now
 warmup_ratio: 0.1
 evals_per_epoch:
 saves_per_epoch: 1
 weight_decay: 0.0
 special_tokens: