feat: support training with JSON string tool arguments (#3136)
* feat: support training with JSON string tool arguments; fix PyArrow data type inconsistent error * feat: raise error for tool call arguments decode * Add test_chat_templates_tool_call_string_arguments.py Add test for string arguments * fix: change to correct qwen3 tokenizer * fix: update docs to clarify arguments json * chore: lint * fix: duplicate * chore: revert * feat: add error to faq * fix: remove duplicate fixture --------- Co-authored-by: caoqinping <caoqinping@lixiang.com> Co-authored-by: gamersover-blog <1611885128@qq.com> Co-authored-by: NanoCode012 <nano@axolotl.ai>
This commit is contained in:
@@ -2,6 +2,7 @@
|
||||
HF Chat Templates prompt strategy
|
||||
"""
|
||||
|
||||
import json
|
||||
from collections import defaultdict
|
||||
from typing import TYPE_CHECKING, Any, Dict, List, Set, Union
|
||||
|
||||
@@ -794,6 +795,22 @@ class ChatTemplateStrategy(PromptTokenizingStrategy):
|
||||
if val is not None:
|
||||
transformed_message[key] = val
|
||||
|
||||
if "tool_calls" in transformed_message and transformed_message["tool_calls"]:
|
||||
for tool_call in transformed_message["tool_calls"]:
|
||||
if "function" in tool_call and "arguments" in tool_call["function"]:
|
||||
args = tool_call["function"]["arguments"]
|
||||
if isinstance(args, str):
|
||||
try:
|
||||
tool_call["function"]["arguments"] = json.loads(args)
|
||||
except json.JSONDecodeError as e:
|
||||
LOG.error(
|
||||
f"Error parsing tool_calls arguments as JSON. "
|
||||
f"Function: {tool_call.get('function', {}).get('name', 'unknown')}, "
|
||||
f"Arguments string: {args!r}, "
|
||||
f"Error: {e}"
|
||||
)
|
||||
raise
|
||||
|
||||
return transformed_message
|
||||
|
||||
def _get_images(self, prompt):
|
||||
|
||||
Reference in New Issue
Block a user