Handle other reasoning trace dataset formats (#2591)

* Handle other reasoning trace dataset formats * rename var to improve readability * chore: refactor with comments --------- Co-authored-by: NanoCode012 <nano@axolotl.ai>
2025-04-30 03:32:55 -04:00
parent 2413688b08
commit baeb00231b
2 changed files with 98 additions and 42 deletions
--- a/src/axolotl/prompt_strategies/chat_template.py
+++ b/src/axolotl/prompt_strategies/chat_template.py
@@ -42,6 +42,7 @@ class ChatTemplatePrompter(Prompter):
            message_property_mappings = {
                "role": "role",
                "content": "content",
+                "reasoning_content": "reasoning_content",
            }

        if roles:
@@ -661,16 +662,46 @@ class ChatTemplateStrategy(PromptTokenizingStrategy):
        # if the role is assistant that we want to use reasoning_content
        if self.split_thinking and transformed_message["role"] == "assistant":
            content = transformed_message["content"]
-            pairs = [("<think>", "</think>"), ("<reasoning>", "</reasoning>")]
-            for pair in pairs:
-                if pair[0] in content and pair[1] in content:
-                    start_idx = content.find(pair[0])
-                    end_idx = content.find(pair[1])
-                    thinking_content = content[start_idx + len(pair[0]) : end_idx]
+            thinking_pairs = [
+                ("<think>", "</think>"),
+                ("<reasoning>", "</reasoning>"),
+                ("<|begin_of_thought|>", "<|end_of_thought|>"),
+            ]
+            content_pairs = [("<|begin_of_solution|>", "<|end_of_solution|>")]
+            for tpair in thinking_pairs:
+                # check if the thinking pair is in the content
+                if tpair[0] in content and tpair[1] in content:
+                    # find the start and end index of the thinking pair
+                    t_start_idx = content.find(tpair[0])
+                    t_end_idx = content.find(tpair[1])
+
+                    # get the thinking content
+                    thinking_content = content[t_start_idx + len(tpair[0]) : t_end_idx]
                    transformed_message["reasoning_content"] = thinking_content.strip()
-                    transformed_message["content"] = content[
-                        end_idx + len(pair[1]) :
-                    ].lstrip()
+
+                    # take remainder of the content
+                    # strip whitespace from beginning of the remainder (thinking tokens)
+                    remainder = content[t_end_idx + len(tpair[1]) :].lstrip()
+
+                    # check if the content pair is in the remainder
+                    cpair_found = False
+                    for cpair in content_pairs:
+                        if cpair[0] in remainder and cpair[1] in remainder:
+                            # find the start and end index of the content pair
+                            c_start_idx = remainder.find(cpair[0])
+                            c_end_idx = remainder.find(cpair[1])
+
+                            # get the content content
+                            content_content = remainder[
+                                c_start_idx + len(cpair[0]) : c_end_idx
+                            ]
+                            transformed_message["content"] = content_content.strip()
+                            cpair_found = True
+                            break
+
+                    # else, the content is the remainder
+                    if not cpair_found:
+                        transformed_message["content"] = remainder
                    break

        # Determine which keys in the original message were not mapped