Add chat_template.argilla_chat support for DPO datasets (#3202)

* Add chat_template.argilla_chat support for DPO datasets Creates a new chat_template.argilla_chat prompt strategy for handling DPO datasets where chosen/rejected fields contain full conversations (messages + final response), following the pattern of chatml.argilla_chat and llama3.argilla_chat. - Add argilla_chat() function to chat_template.py - Add chat_template.argilla_chat to RLHF documentation - Add test coverage for argilla_chat with multiple tokenizers Dataset format: { "chosen": [ {"role": "user", "content": "..."}, {"role": "assistant", "content": "..."} ], "rejected": [ {"role": "user", "content": "..."}, {"role": "assistant", "content": "..."} ] } * Fix chat_template.argilla_chat return value contract and add docstring - Return (transform_fn, dataset_kwargs) tuple instead of bare transform_fn - Add remove_columns specification for field_chosen and field_rejected - Add comprehensive docstring with Args/Returns sections - Update tests to unpack tuple return value Addresses PR feedback to maintain consistency with chat_template.default() and properly specify columns to remove after dataset transformation. * Update tests/prompt_strategies/test_dpo_chat_templates.py Co-authored-by: Wing Lian <wing.lian@gmail.com> --------- Co-authored-by: Wing Lian <wing.lian@gmail.com>
2025-10-17 19:00:26 +09:00
parent 93ba57396f
commit 87565ecc05
3 changed files with 212 additions and 1 deletions
--- a/src/axolotl/prompt_strategies/dpo/chat_template.py
+++ b/src/axolotl/prompt_strategies/dpo/chat_template.py
@@ -120,3 +120,123 @@ def default(cfg, dataset_idx=0, **kwargs):
        return result

    return transform_fn, {"remove_columns": [field_messages]}
+
+
+def argilla_chat(cfg, dataset_idx=0, **kwargs):
+    """
+    DPO chat template strategy for argilla-style datasets.
+
+    For argilla-style datasets where chosen/rejected contain full conversations
+    instead of single response messages. Extracts the conversation history from
+    the chosen field and formats both chosen/rejected responses using the
+    configured chat template.
+
+    Args:
+        cfg: Configuration object containing chat_template and dataset settings
+        dataset_idx: Index of the dataset in the config (default: 0)
+        **kwargs: Additional keyword arguments (unused)
+
+    Returns:
+        tuple: (transform_fn, dataset_kwargs) where:
+            - transform_fn: Function to transform dataset samples
+            - dataset_kwargs: Dict with 'remove_columns' specifying columns to drop
+
+    Dataset format:
+        {
+            "chosen": [
+                {"role": "user", "content": "..."},
+                {"role": "assistant", "content": "..."}
+            ],
+            "rejected": [
+                {"role": "user", "content": "..."},
+                {"role": "assistant", "content": "..."}
+            ]
+        }
+    """
+    ds_cfg = cfg["datasets"][dataset_idx]
+    ds_cfg = handle_legacy_message_fields_logic(ds_cfg)
+
+    chat_template_choice, chat_template_jinja = extract_chat_template_args(
+        cfg=cfg, ds_cfg=ds_cfg
+    )
+    field_chosen = ds_cfg.get("field_chosen", "chosen")
+    field_rejected = ds_cfg.get("field_rejected", "rejected")
+    message_property_mappings = ds_cfg.get(
+        "message_property_mappings",
+        {
+            "role": "role",
+            "content": "content",
+        },
+    )
+    role_map_inv = ds_cfg.get(
+        "roles",
+        {
+            "user": ["user"],
+            "assistant": ["assistant"],
+            "system": ["system"],
+        },
+    )
+    role_map = {}
+    for target, sources in role_map_inv.items():
+        for source in sources:
+            role_map[source] = target
+
+    def transform_fn(sample, tokenizer=None):
+        chat_template_string = get_chat_template(
+            user_choice=chat_template_choice,
+            jinja_template=chat_template_jinja,
+            tokenizer=tokenizer,
+        )
+
+        chosen_raw = sample[field_chosen]
+        rejected_raw = sample[field_rejected]
+
+        # Extract messages (all but last) and responses (last message)
+        chosen_messages = [
+            {
+                "role": role_map[m[message_property_mappings["role"]]],
+                "content": m[message_property_mappings["content"]],
+            }
+            for m in chosen_raw[:-1]
+        ]
+        chosen_response = {
+            "role": role_map[chosen_raw[-1][message_property_mappings["role"]]],
+            "content": chosen_raw[-1][message_property_mappings["content"]],
+        }
+
+        rejected_response = {
+            "role": role_map[rejected_raw[-1][message_property_mappings["role"]]],
+            "content": rejected_raw[-1][message_property_mappings["content"]],
+        }
+
+        dummy_user_message = {"role": "user", "content": "[[dummy_message]]"}
+
+        result = {}
+        result["prompt"] = tokenizer.apply_chat_template(
+            chosen_messages,
+            add_generation_prompt=True,
+            chat_template=chat_template_string,
+            tokenize=False,
+        )
+
+        result["chosen"] = tokenizer.apply_chat_template(
+            [dummy_user_message, chosen_response],
+            add_generation_prompt=False,
+            chat_template=chat_template_string,
+            tokenize=False,
+        )
+        chosen_strip_index = result["chosen"].find(chosen_response["content"])
+        result["chosen"] = result["chosen"][chosen_strip_index:].rstrip()
+
+        result["rejected"] = tokenizer.apply_chat_template(
+            [dummy_user_message, rejected_response],
+            add_generation_prompt=False,
+            chat_template=chat_template_string,
+            tokenize=False,
+        )
+        rejected_strip_index = result["rejected"].find(rejected_response["content"])
+        result["rejected"] = result["rejected"][rejected_strip_index:].rstrip()
+
+        return result
+
+    return transform_fn, {"remove_columns": [field_chosen, field_rejected]}