Post release fixes (#2581)

* fix missing kwarg on child * make the runpod test shorter * update docs * rename runpod test json file * typing fixes and ordering of doc
2025-04-29 10:01:38 -04:00
parent fedbcc0254
commit 80b4edb4a7
4 changed files with 27 additions and 20 deletions
--- a/.runpod/test-input.json
+++ b/.runpod/test-input.json
@@ -12,22 +12,22 @@
      "base_model": "HuggingFaceTB/SmolLM2-135M",
      "model_type": "AutoModelForCausalLM",
      "tokenizer_type": "AutoTokenizer",
-      "load_in_8bit": true,
-      "load_in_4bit": false,
+      "load_in_4bit": true,
      "strict": false,
      "datasets": [
        {
          "path": "mhenrichsen/alpaca_2k_test",
-          "type": "alpaca"
+          "type": "alpaca",
+          "split": "train[:10%]"
        }
      ],
-      "val_set_size": 0.05,
+      "val_set_size": 0.02,
      "output_dir": "./outputs/lora-out",
      "sequence_len": 4096,
      "sample_packing": true,
      "eval_sample_packing": false,
      "pad_to_sequence_len": true,
-      "adapter": "lora",
+      "adapter": "qlora",
      "lora_r": 32,
      "lora_alpha": 64,
      "lora_dropout": 0.05,
@@ -36,8 +36,8 @@
        "embed_tokens",
        "lm_head"
      ],
-      "gradient_accumulation_steps": 4,
-      "micro_batch_size": 2,
+      "gradient_accumulation_steps": 2,
+      "micro_batch_size": 1,
      "num_epochs": 1,
      "optimizer": "adamw_torch_fused",
      "lr_scheduler": "cosine",
@@ -56,7 +56,8 @@
      "weight_decay": 0.0,
      "special_tokens": {
        "pad_token": "<|endoftext|>"
-      }
+      },
+      "max_steps": 20
    },
    "timeout": 100000
  },
--- a/docs/config.qmd
+++ b/docs/config.qmd
@@ -184,6 +184,10 @@ datasets:
    # adding a system turn with empty content.
    drop_system_message:

+    # Optional[bool]. Whether to split the assistant turn based on a reasoning trace inside delimited tags
+    # defaults to False
+    split_thinking:
+
    # IMPORTANT: The following fields determine which parts of the conversation to train on.
    # Priority order: message_field_training > message_field_training_detail > train_on_inputs or role in roles_to_train
    # See examples at `docs/dataset-formats/conversation.qmd`
--- a/src/axolotl/integrations/kd/chat_template.py
+++ b/src/axolotl/integrations/kd/chat_template.py
@@ -37,6 +37,7 @@ class ChatTemplateStrategyWithKD(ChatTemplateStrategy):
        train_on_eos=None,
        train_on_eot=None,
        eot_tokens=None,
+        split_thinking: bool | None = False,
        logprobs_field="logprobs",
        gen_temperature=1.0,
        kd_temperature=1.0,
@@ -54,6 +55,7 @@ class ChatTemplateStrategyWithKD(ChatTemplateStrategy):
            train_on_eos=train_on_eos,
            train_on_eot=train_on_eot,
            eot_tokens=eot_tokens,
+            split_thinking=split_thinking,
        )

    @property
--- a/src/axolotl/prompt_strategies/chat_template.py
+++ b/src/axolotl/prompt_strategies/chat_template.py
@@ -4,7 +4,7 @@ HF Chat Templates prompt strategy

 import logging
 from collections import defaultdict
-from typing import Any, Dict, List, Optional, Set, Union
+from typing import Any, Dict, List, Set, Union

 from pydantic import BaseModel
 from transformers import ProcessorMixin
@@ -29,12 +29,12 @@ class ChatTemplatePrompter(Prompter):
        chat_template: str,
        processor=None,
        max_length=2048,
-        message_property_mappings: Optional[Dict[str, str]] = None,
-        message_field_training: Optional[str] = None,
-        message_field_training_detail: Optional[str] = None,
+        message_property_mappings: Dict[str, str] | None = None,
+        message_field_training: str | None = None,
+        message_field_training_detail: str | None = None,
        field_messages: str = "messages",
        field_system: str = "system",
-        roles: Optional[Dict[str, List[str]]] = None,
+        roles: Dict[str, List[str]] | None = None,
        drop_system_message: bool = False,
    ):
        # check if message_property_mappings is None or empty dict
@@ -65,7 +65,7 @@ class ChatTemplatePrompter(Prompter):
        self.field_messages = field_messages
        self.field_system = field_system
        self.tokenizer = tokenizer
-        self.processor: Optional[ProcessorMixin] = processor
+        self.processor: ProcessorMixin | None = processor
        self.chat_template = chat_template
        self.max_length = max_length
        self.drop_system_message = drop_system_message
@@ -224,11 +224,11 @@ class ChatTemplateStrategy(PromptTokenizingStrategy):
        tokenizer,
        train_on_inputs: bool,
        sequence_len: int,
-        roles_to_train: Optional[List[str]] = None,
-        train_on_eos: Optional[str] = None,
-        train_on_eot: Optional[str] = None,
-        eot_tokens: Optional[List[str]] = None,
-        split_thinking: Optional[bool] = False,
+        roles_to_train: list[str] | None = None,
+        train_on_eos: str | None = None,
+        train_on_eot: str | None = None,
+        eot_tokens: list[str] | None = None,
+        split_thinking: bool | None = False,
    ):
        super().__init__(prompter, tokenizer, train_on_inputs, sequence_len)
        self.prompter: ChatTemplatePrompter = prompter
@@ -714,7 +714,7 @@ class StrategyLoader:
        self,
        tokenizer,
        cfg,
-        ds_cfg: Optional[Union[Dict[str, Any], DatasetConfig]] = None,
+        ds_cfg: Union[Dict[str, Any], DatasetConfig] | None = None,
        processor=None,
    ):
        if ds_cfg is None: