import os
-# Optionally, upload your own JSONL to your Google Drive
-GOOGLE_DRIVE_PATH =""# ex: "MyDrive/Colab\ Notebooks/train.jsonl"
-
-# "Select All" permissions, or you may get the error:
-# "MessageError: Error: credential propagation was unsuccessful"
-if GOOGLE_DRIVE_PATH:
-from google.colab import drive
-# Mount your Google Drive
- GOOGLE_DRIVE_MNT ="/content/drive/"
- drive.mount(GOOGLE_DRIVE_MNT, force_remount=True)
- tmp_path = os.path.join(GOOGLE_DRIVE_MNT, GOOGLE_DRIVE_PATH.lstrip("/"))
-# make sure file exists
-ifnot os.path.isfile(tmp_path):
-raiseValueError(f"File {tmp_path} does not exist")
- dataset_id = tmp_path
+
+# Optionally, upload your own JSONL to your Google Drive
+GOOGLE_DRIVE_PATH =""# ex: "MyDrive/Colab\ Notebooks/train.jsonl"
+
+# "Select All" permissions, or you may get the error:
+# "MessageError: Error: credential propagation was unsuccessful"
+if GOOGLE_DRIVE_PATH:
+from google.colab import drive
+
+# Mount your Google Drive
+ GOOGLE_DRIVE_MNT ="/content/drive/"
+ drive.mount(GOOGLE_DRIVE_MNT, force_remount=True)
+ tmp_path = os.path.join(GOOGLE_DRIVE_MNT, GOOGLE_DRIVE_PATH.lstrip("/"))
+# make sure file exists
+ifnot os.path.isfile(tmp_path):
+raiseValueError(f"File {tmp_path} does not exist")
+ dataset_id = tmp_path
@@ -590,61 +592,66 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
# Axolotl provides full control and transparency over model and training configurationconfig = DictDefault(
- base_model ="Qwen/Qwen3-14B", # Use the instruct tuned model, but we're aligning it to be a pirate
- load_in_4bit =True, # set to True for qLoRA
- adapter ="qlora",
- lora_r =32,
- lora_alpha =64,
- lora_target_modules = [
-"q_proj", "k_proj", "v_proj", "o_proj", # train self_attn linear modules
-"gate_proj", "down_proj", "up_proj", # train MLP linear modules
- ],
- lora_qkv_kernel =True, # optimized triton kernels for LoRA
- lora_o_kernel =True,
- lora_mlp_kernel =True,
- embeddings_skip_upcast =True, # keep embeddings in fp16 so the model fits in 15GB VRAM
- xformers_attention =True, # use xformers on Colab w/ T4 for memory efficient attention, flash_attention only on Ampere or above
- plugins = [
-# more efficient training using Apple's Cut Cross Entropy; https://github.com/apple/ml-cross-entropy
-"axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin",
- ],
- sample_packing =True, # 2-6x increase in tokens per micro-batch
-# when using packing, use a slightly higher learning rate to account for fewer steps
-# alternatively, reduce the micro_batch_size + gradient_accumulation_steps to achieve closer to the same number of steps/epoch
- learning_rate =0.00019,
- sequence_len =4096, # larger sequence length improves packing efficiency for more tokens/sec
- micro_batch_size =1,
- gradient_accumulation_steps =1,
- gradient_checkpointing =True, # tradeoff reduced VRAM for increased time
- gradient_checkpointing_kwargs = {
-"use_reentrant": False,
- },
- optimizer ="paged_adamw_8bit",
- lr_scheduler ="cosine",
- warmup_steps =5,
- fp16 =True, # use float16 + automatic mixed precision, bfloat16 not supported on Colab w/ T4
- bf16 =False,
- max_grad_norm =0.1, # gradient clipping
- num_epochs =1,
- saves_per_epoch =2, # how many checkpoints to save over one epoch
- logging_steps =1,
- output_dir ="./outputs/qwen-sft-pirate-rrr",
- chat_template ="qwen3",
- datasets = [
- {
-"path": dataset_id, # Huggingface Dataset id or path to train.jsonl
-"type": "chat_template",
-"split": "train",
-"eot_tokens": ["<|im_end|>"],
- }
- ],
- dataloader_prefetch_factor =8, # dataloader optimizations
- dataloader_num_workers =2,
- dataloader_pin_memory =True,
- )
-
-# validates the configuration
-cfg = load_cfg(config)
+ base_model="Qwen/Qwen3-14B", # Use the instruct tuned model, but we're aligning it to be a pirate
+ load_in_4bit=True, # set to True for qLoRA
+ adapter="qlora",
+ lora_r=32,
+ lora_alpha=64,
+ lora_target_modules=[
+"q_proj",
+"k_proj",
+"v_proj",
+"o_proj", # train self_attn linear modules
+"gate_proj",
+"down_proj",
+"up_proj", # train MLP linear modules
+ ],
+ lora_qkv_kernel=True, # optimized triton kernels for LoRA
+ lora_o_kernel=True,
+ lora_mlp_kernel=True,
+ embeddings_skip_upcast=True, # keep embeddings in fp16 so the model fits in 15GB VRAM
+ xformers_attention=True, # use xformers on Colab w/ T4 for memory efficient attention, flash_attention only on Ampere or above
+ plugins=[
+# more efficient training using Apple's Cut Cross Entropy; https://github.com/apple/ml-cross-entropy
+"axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin",
+ ],
+ sample_packing=True, # 2-6x increase in tokens per micro-batch
+# when using packing, use a slightly higher learning rate to account for fewer steps
+# alternatively, reduce the micro_batch_size + gradient_accumulation_steps to achieve closer to the same number of steps/epoch
+ learning_rate=0.00019,
+ sequence_len=4096, # larger sequence length improves packing efficiency for more tokens/sec
+ micro_batch_size=1,
+ gradient_accumulation_steps=1,
+ gradient_checkpointing=True, # tradeoff reduced VRAM for increased time
+ gradient_checkpointing_kwargs={
+"use_reentrant": False,
+ },
+ optimizer="paged_adamw_8bit",
+ lr_scheduler="cosine",
+ warmup_steps=5,
+ fp16=True, # use float16 + automatic mixed precision, bfloat16 not supported on Colab w/ T4
+ bf16=False,
+ max_grad_norm=0.1, # gradient clipping
+ num_epochs=1,
+ saves_per_epoch=2, # how many checkpoints to save over one epoch
+ logging_steps=1,
+ output_dir="./outputs/qwen-sft-pirate-rrr",
+ chat_template="qwen3",
+ datasets=[
+ {
+"path": dataset_id, # Huggingface Dataset id or path to train.jsonl
+"type": "chat_template",
+"split": "train",
+"eot_tokens": ["<|im_end|>"],
+ }
+ ],
+ dataloader_prefetch_factor=8, # dataloader optimizations
+ dataloader_num_workers=2,
+ dataloader_pin_memory=True,
+)
+
+# validates the configuration
+cfg = load_cfg(config)
from axolotl.utils import patch_optimized_env
-# speedup downloads from HF 🤗 and set "PYTORCH_CUDA_ALLOC_CONF" env to save memory
-patch_optimized_env()
+
+# speedup downloads from HF 🤗 and set "PYTORCH_CUDA_ALLOC_CONF" env to save memory
+patch_optimized_env()
@@ -1235,29 +1243,30 @@ You're using a Qwen2TokenizerFast tokenizer. Please note that with a fast tokeni
Ahoy there, matey! Shiver me timbers, ye be lookin' for the Pythagorean theorem, eh? Well, hold yer horses and listen up, for I'll be tellin' ye all about it in me own special way.
@@ -1295,14 +1304,15 @@ drwxr-xr-x 2 root root 4.0K May 7 22:21 checkpoint-25
If you prefer to manually upload the training artifacts, we can still upload the entire final checkpoint to HuggingFace from the CLI.
from huggingface_hub import notebook_login
-# remove the partial epoch checkpoints
-!rm -rf "./outputs/qwen-sft-pirate-rrr/checkpoint-*"
-
-# HF Notebook login widget
-notebook_login()
-
-# upload the LoRA adapter for your model to HF, remember to update the username/model-name below
-!huggingface-cli upload --repo-type=model winglian/pirate-qwen-14B"./outputs/qwen-sft-pirate-rrr"
+
+# remove the partial epoch checkpoints
+!rm -rf "./outputs/qwen-sft-pirate-rrr/checkpoint-*"
+
+# HF Notebook login widget
+notebook_login()
+
+# upload the LoRA adapter for your model to HF, remember to update the username/model-name below
+!huggingface-cli upload --repo-type=model winglian/pirate-qwen-14B"./outputs/qwen-sft-pirate-rrr"