feat: support training with JSON string tool arguments (#3136)

* feat: support training with JSON string tool arguments; fix PyArrow data type inconsistent error

* feat: raise error for tool call arguments decode

* Add test_chat_templates_tool_call_string_arguments.py

Add test for string arguments

* fix: change to correct qwen3 tokenizer

* fix: update docs to clarify arguments json

* chore: lint

* fix: duplicate

* chore: revert

* feat: add error to faq

* fix: remove duplicate fixture

---------

Co-authored-by: caoqinping <caoqinping@lixiang.com>
Co-authored-by: gamersover-blog <1611885128@qq.com>
Co-authored-by: NanoCode012 <nano@axolotl.ai>
This commit is contained in:
陈华杰
2025-09-25 13:06:21 +08:00
committed by GitHub
parent 856ff12171
commit e8b962d47f
7 changed files with 252 additions and 20 deletions

View File

@@ -2,6 +2,7 @@
HF Chat Templates prompt strategy
"""
import json
from collections import defaultdict
from typing import TYPE_CHECKING, Any, Dict, List, Set, Union
@@ -794,6 +795,22 @@ class ChatTemplateStrategy(PromptTokenizingStrategy):
if val is not None:
transformed_message[key] = val
if "tool_calls" in transformed_message and transformed_message["tool_calls"]:
for tool_call in transformed_message["tool_calls"]:
if "function" in tool_call and "arguments" in tool_call["function"]:
args = tool_call["function"]["arguments"]
if isinstance(args, str):
try:
tool_call["function"]["arguments"] = json.loads(args)
except json.JSONDecodeError as e:
LOG.error(
f"Error parsing tool_calls arguments as JSON. "
f"Function: {tool_call.get('function', {}).get('name', 'unknown')}, "
f"Arguments string: {args!r}, "
f"Error: {e}"
)
raise
return transformed_message
def _get_images(self, prompt):