diff --git a/agent_service/tools/receipt_parser.py b/agent_service/tools/receipt_parser.py index 78fa480..ce69ac9 100644 --- a/agent_service/tools/receipt_parser.py +++ b/agent_service/tools/receipt_parser.py @@ -105,11 +105,30 @@ def _ocr_image_vision(data: bytes, filename: str, ollama_url: str, model: str) - Returns empty string on any failure so the caller falls back to Tesseract. """ import json as _json + import re as _re + + def _repair_json(s: str) -> str: + """Fix the most common LLM JSON formatting mistakes. + + Handles: + - trailing commas before } or ] → {"a":1,} becomes {"a":1} + - single-quoted strings → {'a':'b'} becomes {"a":"b"} + - unquoted string keys → {a: "b"} becomes {"a": "b"} + """ + # trailing commas + s = _re.sub(r',\s*([}\]])', r'\1', s) + # single-quoted strings (careful around apostrophes in values) + s = _re.sub(r"'([^']*)'", r'"\1"', s) + # unquoted keys: word characters before a colon + s = _re.sub(r'(?