From 80b4edb4a765fa7967b60b7eba93732016564870 Mon Sep 17 00:00:00 2001
From: Wing Lian <wing@axolotl.ai>
Date: Tue, 29 Apr 2025 10:01:38 -0400
Subject: [PATCH] Post release fixes (#2581)

* fix missing kwarg on child

* make the runpod test shorter

* update docs

* rename runpod test json file

* typing fixes and ordering of doc
---
 .runpod/{tests.json => test-input.json}       | 17 ++++++-------
 docs/config.qmd                               |  4 ++++
 src/axolotl/integrations/kd/chat_template.py  |  2 ++
 .../prompt_strategies/chat_template.py        | 24 +++++++++----------
 4 files changed, 27 insertions(+), 20 deletions(-)
 rename .runpod/{tests.json => test-input.json} (87%)

diff --git a/.runpod/tests.json b/.runpod/test-input.json
similarity index 87%
rename from .runpod/tests.json
rename to .runpod/test-input.json
index 1d1e0287b..52bc905e3 100644
--- a/.runpod/tests.json
+++ b/.runpod/test-input.json
@@ -12,22 +12,22 @@
       "base_model": "HuggingFaceTB/SmolLM2-135M",
       "model_type": "AutoModelForCausalLM",
       "tokenizer_type": "AutoTokenizer",
-      "load_in_8bit": true,
-      "load_in_4bit": false,
+      "load_in_4bit": true,
       "strict": false,
       "datasets": [
         {
           "path": "mhenrichsen/alpaca_2k_test",
-          "type": "alpaca"
+          "type": "alpaca",
+          "split": "train[:10%]"
         }
       ],
-      "val_set_size": 0.05,
+      "val_set_size": 0.02,
       "output_dir": "./outputs/lora-out",
       "sequence_len": 4096,
       "sample_packing": true,
       "eval_sample_packing": false,
       "pad_to_sequence_len": true,
-      "adapter": "lora",
+      "adapter": "qlora",
       "lora_r": 32,
       "lora_alpha": 64,
       "lora_dropout": 0.05,
@@ -36,8 +36,8 @@
         "embed_tokens",
         "lm_head"
       ],
-      "gradient_accumulation_steps": 4,
-      "micro_batch_size": 2,
+      "gradient_accumulation_steps": 2,
+      "micro_batch_size": 1,
       "num_epochs": 1,
       "optimizer": "adamw_torch_fused",
       "lr_scheduler": "cosine",
@@ -56,7 +56,8 @@
       "weight_decay": 0.0,
       "special_tokens": {
         "pad_token": "<|endoftext|>"
-      }
+      },
+      "max_steps": 20
     },
     "timeout": 100000
   },
diff --git a/docs/config.qmd b/docs/config.qmd
index 8795fa4ab..7b0d40462 100644
--- a/docs/config.qmd
+++ b/docs/config.qmd
@@ -184,6 +184,10 @@ datasets:
     # adding a system turn with empty content.
     drop_system_message:
 
+    # Optional[bool]. Whether to split the assistant turn based on a reasoning trace inside delimited tags
+    # defaults to False
+    split_thinking:
+
     # IMPORTANT: The following fields determine which parts of the conversation to train on.
     # Priority order: message_field_training > message_field_training_detail > train_on_inputs or role in roles_to_train
     # See examples at `docs/dataset-formats/conversation.qmd`
diff --git a/src/axolotl/integrations/kd/chat_template.py b/src/axolotl/integrations/kd/chat_template.py
index 131570aea..eb067cd04 100644
--- a/src/axolotl/integrations/kd/chat_template.py
+++ b/src/axolotl/integrations/kd/chat_template.py
@@ -37,6 +37,7 @@ class ChatTemplateStrategyWithKD(ChatTemplateStrategy):
         train_on_eos=None,
         train_on_eot=None,
         eot_tokens=None,
+        split_thinking: bool | None = False,
         logprobs_field="logprobs",
         gen_temperature=1.0,
         kd_temperature=1.0,
@@ -54,6 +55,7 @@ class ChatTemplateStrategyWithKD(ChatTemplateStrategy):
             train_on_eos=train_on_eos,
             train_on_eot=train_on_eot,
             eot_tokens=eot_tokens,
+            split_thinking=split_thinking,
         )
 
     @property
diff --git a/src/axolotl/prompt_strategies/chat_template.py b/src/axolotl/prompt_strategies/chat_template.py
index 399bb378a..c2948fc11 100644
--- a/src/axolotl/prompt_strategies/chat_template.py
+++ b/src/axolotl/prompt_strategies/chat_template.py
@@ -4,7 +4,7 @@ HF Chat Templates prompt strategy
 
 import logging
 from collections import defaultdict
-from typing import Any, Dict, List, Optional, Set, Union
+from typing import Any, Dict, List, Set, Union
 
 from pydantic import BaseModel
 from transformers import ProcessorMixin
@@ -29,12 +29,12 @@ class ChatTemplatePrompter(Prompter):
         chat_template: str,
         processor=None,
         max_length=2048,
-        message_property_mappings: Optional[Dict[str, str]] = None,
-        message_field_training: Optional[str] = None,
-        message_field_training_detail: Optional[str] = None,
+        message_property_mappings: Dict[str, str] | None = None,
+        message_field_training: str | None = None,
+        message_field_training_detail: str | None = None,
         field_messages: str = "messages",
         field_system: str = "system",
-        roles: Optional[Dict[str, List[str]]] = None,
+        roles: Dict[str, List[str]] | None = None,
         drop_system_message: bool = False,
     ):
         # check if message_property_mappings is None or empty dict
@@ -65,7 +65,7 @@ class ChatTemplatePrompter(Prompter):
         self.field_messages = field_messages
         self.field_system = field_system
         self.tokenizer = tokenizer
-        self.processor: Optional[ProcessorMixin] = processor
+        self.processor: ProcessorMixin | None = processor
         self.chat_template = chat_template
         self.max_length = max_length
         self.drop_system_message = drop_system_message
@@ -224,11 +224,11 @@ class ChatTemplateStrategy(PromptTokenizingStrategy):
         tokenizer,
         train_on_inputs: bool,
         sequence_len: int,
-        roles_to_train: Optional[List[str]] = None,
-        train_on_eos: Optional[str] = None,
-        train_on_eot: Optional[str] = None,
-        eot_tokens: Optional[List[str]] = None,
-        split_thinking: Optional[bool] = False,
+        roles_to_train: list[str] | None = None,
+        train_on_eos: str | None = None,
+        train_on_eot: str | None = None,
+        eot_tokens: list[str] | None = None,
+        split_thinking: bool | None = False,
     ):
         super().__init__(prompter, tokenizer, train_on_inputs, sequence_len)
         self.prompter: ChatTemplatePrompter = prompter
@@ -714,7 +714,7 @@ class StrategyLoader:
         self,
         tokenizer,
         cfg,
-        ds_cfg: Optional[Union[Dict[str, Any], DatasetConfig]] = None,
+        ds_cfg: Union[Dict[str, Any], DatasetConfig] | None = None,
         processor=None,
     ):
         if ds_cfg is None: