From 9fa391c720d8bd62d9ed7e6395163384429ef323 Mon Sep 17 00:00:00 2001 From: Carlos Garcia Date: Wed, 20 May 2026 22:19:20 -0400 Subject: [PATCH] =?UTF-8?q?fix:=20reduce=20hallucination=20in=20receipt=20?= =?UTF-8?q?extraction=20=E2=80=94=20conservative=20prompts=20+=20date=20in?= =?UTF-8?q?jection?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two sources of hallucinated values in receipt parsing: 1. The LLM extraction prompt had no explicit "don't guess" constraint, so when Tesseract produced garbled OCR text the LLM substituted plausible- looking values (wrong vendor names, wrong totals) instead of returning safe defaults. 2. The date field asked the LLM to extract the date from the OCR text even when date_hint (from the filename timestamp, e.g. 20260509_180857.jpg) was already available — a reliable signal that was being ignored. expenses_agent._parse_receipt_text: - LLM path: new prompt leads with "copy values EXACTLY, do NOT guess or infer"; adds "if OCR looks corrupted, return safe default rather than a more logical value"; injects date_hint directly as an authoritative value when available so the LLM never needs to extract the date. - Vision fast path: normalise "null" string for date the same way as time; prefer date_hint over a null date returned by the vision model. receipt_parser._ocr_image_vision: - Vision prompt now leads with the same "copy exactly, do not guess" constraint and explicitly accepts null for date/time when not clearly visible, matching the conservative tone of the LLM extraction prompt. Co-Authored-By: Claude Sonnet 4.6 --- agent_service/agents/expenses_agent.py | 51 +++++++++++++++++++------- agent_service/tools/receipt_parser.py | 28 ++++++++------ 2 files changed, 54 insertions(+), 25 deletions(-) diff --git a/agent_service/agents/expenses_agent.py b/agent_service/agents/expenses_agent.py index 2957479..fe20f25 100644 --- a/agent_service/agents/expenses_agent.py +++ b/agent_service/agents/expenses_agent.py @@ -360,13 +360,17 @@ class ExpensesAgent(BaseAgent): # Map the vision category label → expense product name product_name = self._match_category( data.get('category', ''), expense_products or []) - # Vision model sometimes returns the string "null" instead of JSON null + # Vision model sometimes returns the string "null" instead + # of JSON null — normalise both fields. + _NULL = (None, 'null', 'None', '') raw_time = data.get('time') - time_val = None if raw_time in (None, 'null', 'None', '') else str(raw_time) + time_val = None if raw_time in _NULL else str(raw_time) + raw_date = data.get('date') + date_val = None if raw_date in _NULL else str(raw_date) return { 'vendor': str(data.get('vendor') or filename), 'amount': float(data.get('amount', 0.0)), - 'date': str(data.get('date') or date_hint or today), + 'date': date_val or date_hint or today, 'time': time_val, 'product_name': product_name, } @@ -398,19 +402,38 @@ class ExpensesAgent(BaseAgent): receipt_text = stripped[:1500] + '\n[...]\n' + stripped[-1500:] else: receipt_text = stripped + + # When the filename carries a reliable timestamp, inject it directly + # so the LLM doesn't try to read (and potentially misread) the date + # from garbled OCR text. + if date_hint: + date_instruction = ( + f'Use exactly "{date_hint}" — this date was read from the file ' + f'timestamp and is more reliable than the OCR text.' + ) + else: + date_instruction = ( + f'Extract from the receipt text in YYYY-MM-DD format; ' + f'use {today} only if no date is visible.' + ) + prompt = ( - 'Extract expense details from the following receipt text. ' + 'You are a receipt data extractor. ' + 'Copy values EXACTLY as they appear in the text — ' + 'do NOT guess, infer, "correct" OCR errors, or invent plausible values.\n\n' 'Return ONLY valid JSON with these keys:\n' - '"vendor" (string, merchant or restaurant name),\n' - '"amount" (number — the FINAL total the customer paid; ' - 'look for a line explicitly labeled "Total", "Grand Total", "Amount Due", or "Balance Due"; ' - 'do NOT use subtotal, tax, tip, or individual line items; ' - 'if the label is ambiguous choose the bottom-most total on the receipt; ' - 'return 0 if no clear total is found),\n' - f'"date" (string YYYY-MM-DD, use {date_hint or today} if not found in text),\n' - '"time" (string HH:MM in 24-hour format — the transaction time printed on the receipt; ' - 'null if not present),\n' - f'"product_name" (string, pick the best match from [{product_list}] or empty string).\n\n' + f'"vendor": merchant name exactly as printed; ' + f'empty string "" if you cannot find it clearly,\n' + f'"amount": the FINAL total — find a line labeled "Total", "Grand Total", ' + f'"Amount Due", or "Balance Due"; copy the number exactly as written; ' + f'never use subtotal, tax, or tip lines; ' + f'return 0 if no clearly labeled final total is present,\n' + f'"date": {date_instruction}\n' + f'"time": transaction time HH:MM (24-hour) exactly as printed, or null,\n' + f'"product_name": best match from [{product_list}] or "".\n\n' + f'IMPORTANT: This text came from OCR and may contain garbled characters. ' + f'If a value looks corrupted, return the safe default (0 / "" / null) ' + f'rather than substituting a "more logical" value.\n\n' f'Receipt text:\n{receipt_text}\n\nJSON only:' ) try: diff --git a/agent_service/tools/receipt_parser.py b/agent_service/tools/receipt_parser.py index 6e49588..78fa480 100644 --- a/agent_service/tools/receipt_parser.py +++ b/agent_service/tools/receipt_parser.py @@ -113,18 +113,24 @@ def _ocr_image_vision(data: bytes, filename: str, ollama_url: str, model: str) - messages=[{ 'role': 'user', 'content': ( - 'This is a photo of a receipt. Extract these fields:\n' - '- vendor: the store or restaurant name\n' - '- amount: the FINAL total the customer paid. Look for a line ' - 'labeled "Total", "Grand Total", "Amount Due", or "Balance Due". ' - 'Do NOT use subtotal, tax, or tip. Return 0 if you cannot find ' - 'a clear final total.\n' - '- date: transaction date in YYYY-MM-DD format\n' - '- time: transaction time in HH:MM 24-hour format, or null\n' - '- category: one word describing the expense type — one of: ' - 'meals, fuel, hotel, office, transport, other\n\n' + 'You are a receipt data extractor. ' + 'Read this receipt image and extract the following fields. ' + 'Copy values EXACTLY as printed — do NOT guess, infer, or ' + 'invent values you cannot clearly see.\n\n' + 'Fields to extract:\n' + '- vendor: the store or restaurant name exactly as printed; ' + 'empty string if not clearly visible\n' + '- amount: the FINAL total the customer paid; find a line ' + 'labeled "Total", "Grand Total", "Amount Due", or "Balance Due"; ' + 'copy the number exactly; do NOT use subtotal, tax, or tip; ' + 'return 0 if no clearly labeled final total is visible\n' + '- date: transaction date in YYYY-MM-DD format; ' + 'null if not clearly visible\n' + '- time: transaction time in HH:MM 24-hour format; ' + 'null if not clearly visible\n' + '- category: one of: meals, fuel, hotel, office, transport, other\n\n' 'Return ONLY a valid JSON object, no commentary, no markdown:\n' - '{"vendor":"...","amount":0.00,"date":"YYYY-MM-DD",' + '{"vendor":"...","amount":0.00,"date":"YYYY-MM-DD or null",' '"time":"HH:MM or null","category":"..."}' ), 'images': [data],