From c13cb7c8537c16434ecdad2373459c7fedd115b3 Mon Sep 17 00:00:00 2001 From: VED <146507396+ved1beta@users.noreply.github.com> Date: Fri, 20 Mar 2026 14:53:42 +0530 Subject: [PATCH] feat: add nemotron config (#3506) * nemotron config exp * Update examples/nemotron/nemotron-mini-4b-qlora.yaml Co-authored-by: NanoCode012 --------- Co-authored-by: NanoCode012 --- examples/nemotron/nemotron-mini-4b-qlora.yaml | 57 +++++++++++++++++++ src/axolotl/monkeypatch/multipack.py | 1 + 2 files changed, 58 insertions(+) create mode 100644 examples/nemotron/nemotron-mini-4b-qlora.yaml diff --git a/examples/nemotron/nemotron-mini-4b-qlora.yaml b/examples/nemotron/nemotron-mini-4b-qlora.yaml new file mode 100644 index 000000000..e796c149c --- /dev/null +++ b/examples/nemotron/nemotron-mini-4b-qlora.yaml @@ -0,0 +1,57 @@ +base_model: nvidia/Nemotron-Mini-4B-Instruct + +load_in_8bit: false +load_in_4bit: true + +datasets: + - path: fozziethebeat/alpaca_messages_2k_test + type: chat_template + +dataset_prepared_path: last_run_prepared +val_set_size: 0.1 +output_dir: ./outputs/nemotron-mini-4b-qlora + +adapter: qlora +lora_model_dir: + +sequence_len: 4096 +sample_packing: true + +lora_r: 32 +lora_alpha: 16 +lora_dropout: 0.05 +lora_target_linear: true +lora_target_modules: + - q_proj + - k_proj + - v_proj + - o_proj + - up_proj + - down_proj + +wandb_project: +wandb_entity: +wandb_watch: +wandb_name: +wandb_log_model: + +gradient_accumulation_steps: 4 +micro_batch_size: 2 +num_epochs: 1 +optimizer: adamw_bnb_8bit +lr_scheduler: cosine +learning_rate: 0.0002 + +bf16: auto +tf32: false + +gradient_checkpointing: true +resume_from_checkpoint: +logging_steps: 1 +flash_attention: true + +warmup_ratio: 0.1 +evals_per_epoch: 1 +saves_per_epoch: 1 + +special_tokens: diff --git a/src/axolotl/monkeypatch/multipack.py b/src/axolotl/monkeypatch/multipack.py index 9e48e73eb..78087acbc 100644 --- a/src/axolotl/monkeypatch/multipack.py +++ b/src/axolotl/monkeypatch/multipack.py @@ -59,6 +59,7 @@ SUPPORTED_MULTIPACK_MODEL_TYPES = [ "ministral3", "mistral4", "afmoe", + "nemotron", ]