fix: reduce hallucination in receipt extraction — conservative prompts + date injection
Two sources of hallucinated values in receipt parsing: 1. The LLM extraction prompt had no explicit "don't guess" constraint, so when Tesseract produced garbled OCR text the LLM substituted plausible- looking values (wrong vendor names, wrong totals) instead of returning safe defaults. 2. The date field asked the LLM to extract the date from the OCR text even when date_hint (from the filename timestamp, e.g. 20260509_180857.jpg) was already available — a reliable signal that was being ignored. expenses_agent._parse_receipt_text: - LLM path: new prompt leads with "copy values EXACTLY, do NOT guess or infer"; adds "if OCR looks corrupted, return safe default rather than a more logical value"; injects date_hint directly as an authoritative value when available so the LLM never needs to extract the date. - Vision fast path: normalise "null" string for date the same way as time; prefer date_hint over a null date returned by the vision model. receipt_parser._ocr_image_vision: - Vision prompt now leads with the same "copy exactly, do not guess" constraint and explicitly accepts null for date/time when not clearly visible, matching the conservative tone of the LLM extraction prompt. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -360,13 +360,17 @@ class ExpensesAgent(BaseAgent):
|
|||||||
# Map the vision category label → expense product name
|
# Map the vision category label → expense product name
|
||||||
product_name = self._match_category(
|
product_name = self._match_category(
|
||||||
data.get('category', ''), expense_products or [])
|
data.get('category', ''), expense_products or [])
|
||||||
# Vision model sometimes returns the string "null" instead of JSON null
|
# Vision model sometimes returns the string "null" instead
|
||||||
|
# of JSON null — normalise both fields.
|
||||||
|
_NULL = (None, 'null', 'None', '')
|
||||||
raw_time = data.get('time')
|
raw_time = data.get('time')
|
||||||
time_val = None if raw_time in (None, 'null', 'None', '') else str(raw_time)
|
time_val = None if raw_time in _NULL else str(raw_time)
|
||||||
|
raw_date = data.get('date')
|
||||||
|
date_val = None if raw_date in _NULL else str(raw_date)
|
||||||
return {
|
return {
|
||||||
'vendor': str(data.get('vendor') or filename),
|
'vendor': str(data.get('vendor') or filename),
|
||||||
'amount': float(data.get('amount', 0.0)),
|
'amount': float(data.get('amount', 0.0)),
|
||||||
'date': str(data.get('date') or date_hint or today),
|
'date': date_val or date_hint or today,
|
||||||
'time': time_val,
|
'time': time_val,
|
||||||
'product_name': product_name,
|
'product_name': product_name,
|
||||||
}
|
}
|
||||||
@@ -398,19 +402,38 @@ class ExpensesAgent(BaseAgent):
|
|||||||
receipt_text = stripped[:1500] + '\n[...]\n' + stripped[-1500:]
|
receipt_text = stripped[:1500] + '\n[...]\n' + stripped[-1500:]
|
||||||
else:
|
else:
|
||||||
receipt_text = stripped
|
receipt_text = stripped
|
||||||
|
|
||||||
|
# When the filename carries a reliable timestamp, inject it directly
|
||||||
|
# so the LLM doesn't try to read (and potentially misread) the date
|
||||||
|
# from garbled OCR text.
|
||||||
|
if date_hint:
|
||||||
|
date_instruction = (
|
||||||
|
f'Use exactly "{date_hint}" — this date was read from the file '
|
||||||
|
f'timestamp and is more reliable than the OCR text.'
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
date_instruction = (
|
||||||
|
f'Extract from the receipt text in YYYY-MM-DD format; '
|
||||||
|
f'use {today} only if no date is visible.'
|
||||||
|
)
|
||||||
|
|
||||||
prompt = (
|
prompt = (
|
||||||
'Extract expense details from the following receipt text. '
|
'You are a receipt data extractor. '
|
||||||
|
'Copy values EXACTLY as they appear in the text — '
|
||||||
|
'do NOT guess, infer, "correct" OCR errors, or invent plausible values.\n\n'
|
||||||
'Return ONLY valid JSON with these keys:\n'
|
'Return ONLY valid JSON with these keys:\n'
|
||||||
'"vendor" (string, merchant or restaurant name),\n'
|
f'"vendor": merchant name exactly as printed; '
|
||||||
'"amount" (number — the FINAL total the customer paid; '
|
f'empty string "" if you cannot find it clearly,\n'
|
||||||
'look for a line explicitly labeled "Total", "Grand Total", "Amount Due", or "Balance Due"; '
|
f'"amount": the FINAL total — find a line labeled "Total", "Grand Total", '
|
||||||
'do NOT use subtotal, tax, tip, or individual line items; '
|
f'"Amount Due", or "Balance Due"; copy the number exactly as written; '
|
||||||
'if the label is ambiguous choose the bottom-most total on the receipt; '
|
f'never use subtotal, tax, or tip lines; '
|
||||||
'return 0 if no clear total is found),\n'
|
f'return 0 if no clearly labeled final total is present,\n'
|
||||||
f'"date" (string YYYY-MM-DD, use {date_hint or today} if not found in text),\n'
|
f'"date": {date_instruction}\n'
|
||||||
'"time" (string HH:MM in 24-hour format — the transaction time printed on the receipt; '
|
f'"time": transaction time HH:MM (24-hour) exactly as printed, or null,\n'
|
||||||
'null if not present),\n'
|
f'"product_name": best match from [{product_list}] or "".\n\n'
|
||||||
f'"product_name" (string, pick the best match from [{product_list}] or empty string).\n\n'
|
f'IMPORTANT: This text came from OCR and may contain garbled characters. '
|
||||||
|
f'If a value looks corrupted, return the safe default (0 / "" / null) '
|
||||||
|
f'rather than substituting a "more logical" value.\n\n'
|
||||||
f'Receipt text:\n{receipt_text}\n\nJSON only:'
|
f'Receipt text:\n{receipt_text}\n\nJSON only:'
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -113,18 +113,24 @@ def _ocr_image_vision(data: bytes, filename: str, ollama_url: str, model: str) -
|
|||||||
messages=[{
|
messages=[{
|
||||||
'role': 'user',
|
'role': 'user',
|
||||||
'content': (
|
'content': (
|
||||||
'This is a photo of a receipt. Extract these fields:\n'
|
'You are a receipt data extractor. '
|
||||||
'- vendor: the store or restaurant name\n'
|
'Read this receipt image and extract the following fields. '
|
||||||
'- amount: the FINAL total the customer paid. Look for a line '
|
'Copy values EXACTLY as printed — do NOT guess, infer, or '
|
||||||
'labeled "Total", "Grand Total", "Amount Due", or "Balance Due". '
|
'invent values you cannot clearly see.\n\n'
|
||||||
'Do NOT use subtotal, tax, or tip. Return 0 if you cannot find '
|
'Fields to extract:\n'
|
||||||
'a clear final total.\n'
|
'- vendor: the store or restaurant name exactly as printed; '
|
||||||
'- date: transaction date in YYYY-MM-DD format\n'
|
'empty string if not clearly visible\n'
|
||||||
'- time: transaction time in HH:MM 24-hour format, or null\n'
|
'- amount: the FINAL total the customer paid; find a line '
|
||||||
'- category: one word describing the expense type — one of: '
|
'labeled "Total", "Grand Total", "Amount Due", or "Balance Due"; '
|
||||||
'meals, fuel, hotel, office, transport, other\n\n'
|
'copy the number exactly; do NOT use subtotal, tax, or tip; '
|
||||||
|
'return 0 if no clearly labeled final total is visible\n'
|
||||||
|
'- date: transaction date in YYYY-MM-DD format; '
|
||||||
|
'null if not clearly visible\n'
|
||||||
|
'- time: transaction time in HH:MM 24-hour format; '
|
||||||
|
'null if not clearly visible\n'
|
||||||
|
'- category: one of: meals, fuel, hotel, office, transport, other\n\n'
|
||||||
'Return ONLY a valid JSON object, no commentary, no markdown:\n'
|
'Return ONLY a valid JSON object, no commentary, no markdown:\n'
|
||||||
'{"vendor":"...","amount":0.00,"date":"YYYY-MM-DD",'
|
'{"vendor":"...","amount":0.00,"date":"YYYY-MM-DD or null",'
|
||||||
'"time":"HH:MM or null","category":"..."}'
|
'"time":"HH:MM or null","category":"..."}'
|
||||||
),
|
),
|
||||||
'images': [data],
|
'images': [data],
|
||||||
|
|||||||
Reference in New Issue
Block a user