diff --git a/.nojekyll b/.nojekyll index 841139098..2fea0cccc 100644 --- a/.nojekyll +++ b/.nojekyll @@ -1 +1 @@ -29a669bc \ No newline at end of file +dd7bb740 \ No newline at end of file diff --git a/docs/api/core.datasets.transforms.chat_builder.html b/docs/api/core.datasets.transforms.chat_builder.html index 7ec2dc765..4d27fe146 100644 --- a/docs/api/core.datasets.transforms.chat_builder.html +++ b/docs/api/core.datasets.transforms.chat_builder.html @@ -510,7 +510,8 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});

core.datasets.transforms.chat_builder

core.datasets.transforms.chat_builder

-

This module contains a function that builds a transform that takes a row from the dataset and converts it to a Chat.

+

This module contains a function that builds a transform that takes a row from the +dataset and converts it to a Chat.

Functions

@@ -532,19 +533,19 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
core.datasets.transforms.chat_builder.chat_message_transform_builder(
     train_on_inputs=False,
     conversations_field='conversations',
-    message_field_role=['role', 'from'],
-    message_field_content=['value', 'text', 'content'],
-    message_field_training=['train', 'weight'],
+    message_field_role=None,
+    message_field_content=None,
+    message_field_training=None,
 )

Builds a transform that takes a row from the dataset and converts it to a Chat

Parameters

-+++-- @@ -571,19 +572,19 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); - + - + - +
message_field_role str | list[str] The field name of the role. Defaults to “role”.['role', 'from']None
message_field_content str | list[str] The field name of the message content. Defaults to “content”.['value', 'text', 'content']None
message_field_training str | list[str] The field name of the train/weight. Defaults to “weight”.['train', 'weight']None
diff --git a/docs/api/index.html b/docs/api/index.html index a58fca786..0d53ec8f6 100644 --- a/docs/api/index.html +++ b/docs/api/index.html @@ -553,7 +553,7 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); core.datasets.transforms.chat_builder -This module contains a function that builds a transform that takes a row from the dataset and converts it to a Chat. +This module contains a function that builds a transform that takes a row from the diff --git a/examples/colab-notebooks/colab-axolotl-example.html b/examples/colab-notebooks/colab-axolotl-example.html index 13bae39ad..8342ba20f 100644 --- a/examples/colab-notebooks/colab-axolotl-example.html +++ b/examples/colab-notebooks/colab-axolotl-example.html @@ -563,21 +563,23 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
import os
-# Optionally, upload your own JSONL to your Google Drive
-GOOGLE_DRIVE_PATH = ""  # ex: "MyDrive/Colab\ Notebooks/train.jsonl"
-
-# "Select All" permissions, or you may get the error:
-# "MessageError: Error: credential propagation was unsuccessful"
-if GOOGLE_DRIVE_PATH:
-    from google.colab import drive
-    # Mount your Google Drive
-    GOOGLE_DRIVE_MNT = "/content/drive/"
-    drive.mount(GOOGLE_DRIVE_MNT, force_remount=True)
-    tmp_path = os.path.join(GOOGLE_DRIVE_MNT, GOOGLE_DRIVE_PATH.lstrip("/"))
-    # make sure file exists
-    if not os.path.isfile(tmp_path):
-        raise ValueError(f"File {tmp_path} does not exist")
-    dataset_id = tmp_path
+ +# Optionally, upload your own JSONL to your Google Drive +GOOGLE_DRIVE_PATH = "" # ex: "MyDrive/Colab\ Notebooks/train.jsonl" + +# "Select All" permissions, or you may get the error: +# "MessageError: Error: credential propagation was unsuccessful" +if GOOGLE_DRIVE_PATH: + from google.colab import drive + + # Mount your Google Drive + GOOGLE_DRIVE_MNT = "/content/drive/" + drive.mount(GOOGLE_DRIVE_MNT, force_remount=True) + tmp_path = os.path.join(GOOGLE_DRIVE_MNT, GOOGLE_DRIVE_PATH.lstrip("/")) + # make sure file exists + if not os.path.isfile(tmp_path): + raise ValueError(f"File {tmp_path} does not exist") + dataset_id = tmp_path
@@ -590,61 +592,66 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true}); # Axolotl provides full control and transparency over model and training configuration config = DictDefault( - base_model = "Qwen/Qwen3-14B", # Use the instruct tuned model, but we're aligning it to be a pirate - load_in_4bit = True, # set to True for qLoRA - adapter = "qlora", - lora_r = 32, - lora_alpha = 64, - lora_target_modules = [ - "q_proj", "k_proj", "v_proj", "o_proj", # train self_attn linear modules - "gate_proj", "down_proj", "up_proj", # train MLP linear modules - ], - lora_qkv_kernel = True, # optimized triton kernels for LoRA - lora_o_kernel = True, - lora_mlp_kernel = True, - embeddings_skip_upcast = True, # keep embeddings in fp16 so the model fits in 15GB VRAM - xformers_attention = True, # use xformers on Colab w/ T4 for memory efficient attention, flash_attention only on Ampere or above - plugins = [ - # more efficient training using Apple's Cut Cross Entropy; https://github.com/apple/ml-cross-entropy - "axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin", - ], - sample_packing = True, # 2-6x increase in tokens per micro-batch - # when using packing, use a slightly higher learning rate to account for fewer steps - # alternatively, reduce the micro_batch_size + gradient_accumulation_steps to achieve closer to the same number of steps/epoch - learning_rate = 0.00019, - sequence_len = 4096, # larger sequence length improves packing efficiency for more tokens/sec - micro_batch_size = 1, - gradient_accumulation_steps = 1, - gradient_checkpointing = True, # tradeoff reduced VRAM for increased time - gradient_checkpointing_kwargs = { - "use_reentrant": False, - }, - optimizer = "paged_adamw_8bit", - lr_scheduler = "cosine", - warmup_steps = 5, - fp16 = True, # use float16 + automatic mixed precision, bfloat16 not supported on Colab w/ T4 - bf16 = False, - max_grad_norm = 0.1, # gradient clipping - num_epochs = 1, - saves_per_epoch = 2, # how many checkpoints to save over one epoch - logging_steps = 1, - output_dir = "./outputs/qwen-sft-pirate-rrr", - chat_template = "qwen3", - datasets = [ - { - "path": dataset_id, # Huggingface Dataset id or path to train.jsonl - "type": "chat_template", - "split": "train", - "eot_tokens": ["<|im_end|>"], - } - ], - dataloader_prefetch_factor = 8, # dataloader optimizations - dataloader_num_workers = 2, - dataloader_pin_memory = True, - ) - -# validates the configuration -cfg = load_cfg(config) + base_model="Qwen/Qwen3-14B", # Use the instruct tuned model, but we're aligning it to be a pirate + load_in_4bit=True, # set to True for qLoRA + adapter="qlora", + lora_r=32, + lora_alpha=64, + lora_target_modules=[ + "q_proj", + "k_proj", + "v_proj", + "o_proj", # train self_attn linear modules + "gate_proj", + "down_proj", + "up_proj", # train MLP linear modules + ], + lora_qkv_kernel=True, # optimized triton kernels for LoRA + lora_o_kernel=True, + lora_mlp_kernel=True, + embeddings_skip_upcast=True, # keep embeddings in fp16 so the model fits in 15GB VRAM + xformers_attention=True, # use xformers on Colab w/ T4 for memory efficient attention, flash_attention only on Ampere or above + plugins=[ + # more efficient training using Apple's Cut Cross Entropy; https://github.com/apple/ml-cross-entropy + "axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin", + ], + sample_packing=True, # 2-6x increase in tokens per micro-batch + # when using packing, use a slightly higher learning rate to account for fewer steps + # alternatively, reduce the micro_batch_size + gradient_accumulation_steps to achieve closer to the same number of steps/epoch + learning_rate=0.00019, + sequence_len=4096, # larger sequence length improves packing efficiency for more tokens/sec + micro_batch_size=1, + gradient_accumulation_steps=1, + gradient_checkpointing=True, # tradeoff reduced VRAM for increased time + gradient_checkpointing_kwargs={ + "use_reentrant": False, + }, + optimizer="paged_adamw_8bit", + lr_scheduler="cosine", + warmup_steps=5, + fp16=True, # use float16 + automatic mixed precision, bfloat16 not supported on Colab w/ T4 + bf16=False, + max_grad_norm=0.1, # gradient clipping + num_epochs=1, + saves_per_epoch=2, # how many checkpoints to save over one epoch + logging_steps=1, + output_dir="./outputs/qwen-sft-pirate-rrr", + chat_template="qwen3", + datasets=[ + { + "path": dataset_id, # Huggingface Dataset id or path to train.jsonl + "type": "chat_template", + "split": "train", + "eot_tokens": ["<|im_end|>"], + } + ], + dataloader_prefetch_factor=8, # dataloader optimizations + dataloader_num_workers=2, + dataloader_pin_memory=True, +) + +# validates the configuration +cfg = load_cfg(config)
[2025-05-08 13:40:27,488] [INFO] [root.register:348] [PID:174] Attempting to load plugin: axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
@@ -673,8 +680,9 @@ gtag('config', 'G-9KYCVJBNMQ', { 'anonymize_ip': true});
 
from axolotl.utils import patch_optimized_env
-# speedup downloads from HF 🤗 and set "PYTORCH_CUDA_ALLOC_CONF" env to save memory
-patch_optimized_env()
+ +# speedup downloads from HF 🤗 and set "PYTORCH_CUDA_ALLOC_CONF" env to save memory +patch_optimized_env()
@@ -1235,29 +1243,30 @@ You're using a Qwen2TokenizerFast tokenizer. Please note that with a fast tokeni

Inferencing the trained model

-
import torch
-from transformers import TextStreamer
-
-messages = [
-    {
-        "role": "user",
-        "content": "Explain the Pythagorean theorem to me.",
-    },
-]
-
-prompt = tokenizer.apply_chat_template(
-    messages,
-    add_generation_prompt=True,
-    tokenize=False,
-    enable_thinking = False,
-)
-
-outputs = model.generate(
-    **tokenizer(prompt, return_tensors = "pt").to("cuda"),
-    max_new_tokens = 192,
-    temperature = 1.0, top_p = 0.8, top_k = 32,
-    streamer = TextStreamer(tokenizer, skip_prompt = True),
-)
+
from transformers import TextStreamer
+
+messages = [
+    {
+        "role": "user",
+        "content": "Explain the Pythagorean theorem to me.",
+    },
+]
+
+prompt = tokenizer.apply_chat_template(
+    messages,
+    add_generation_prompt=True,
+    tokenize=False,
+    enable_thinking=False,
+)
+
+outputs = model.generate(
+    **tokenizer(prompt, return_tensors="pt").to("cuda"),
+    max_new_tokens=192,
+    temperature=1.0,
+    top_p=0.8,
+    top_k=32,
+    streamer=TextStreamer(tokenizer, skip_prompt=True),
+)
Ahoy there, matey! Shiver me timbers, ye be lookin' for the Pythagorean theorem, eh? Well, hold yer horses and listen up, for I'll be tellin' ye all about it in me own special way.
 
@@ -1295,14 +1304,15 @@ drwxr-xr-x 2 root root 4.0K May  7 22:21 checkpoint-25
 

If you prefer to manually upload the training artifacts, we can still upload the entire final checkpoint to HuggingFace from the CLI.

from huggingface_hub import notebook_login
-# remove the partial epoch checkpoints
-!rm -rf "./outputs/qwen-sft-pirate-rrr/checkpoint-*"
-
-# HF Notebook login widget
-notebook_login()
-
-# upload the LoRA adapter for your model to HF, remember to update the username/model-name below
-!huggingface-cli upload --repo-type=model winglian/pirate-qwen-14B "./outputs/qwen-sft-pirate-rrr"
+ +# remove the partial epoch checkpoints +!rm -rf "./outputs/qwen-sft-pirate-rrr/checkpoint-*" + +# HF Notebook login widget +notebook_login() + +# upload the LoRA adapter for your model to HF, remember to update the username/model-name below +!huggingface-cli upload --repo-type=model winglian/pirate-qwen-14B "./outputs/qwen-sft-pirate-rrr"