fix: vision OCR JSON failures — add format='json' and repair fallback

Three receipts per batch were failing with JSONDecodeError (e.g.
"Expecting ':' delimiter: line 1 column 90") because activeblue-chat
(llama3.2-vision) occasionally outputs near-JSON with trailing commas,
single-quoted strings, or unquoted keys.

Two-layer fix:
1. Add format='json' to the Ollama chat call — Ollama JSON mode forces
   syntactically valid output at the sampler level, eliminating most
   structural errors.
2. Add _repair_json() fallback that runs on any remaining JSONDecodeError:
   strips trailing commas, converts single→double quotes, and quotes
   unquoted keys. If repair succeeds, the result is re-serialised as
   canonical JSON before being returned.

Also re-serialise with json.dumps() on success so the fast path in
_parse_receipt_text always receives clean, canonical JSON regardless of
whitespace or key ordering in the model's original output.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Carlos Garcia
2026-05-20 22:24:50 -04:00
parent 9fa391c720
commit ec6b41943f

View File

@@ -105,11 +105,30 @@ def _ocr_image_vision(data: bytes, filename: str, ollama_url: str, model: str) -
Returns empty string on any failure so the caller falls back to Tesseract. Returns empty string on any failure so the caller falls back to Tesseract.
""" """
import json as _json import json as _json
import re as _re
def _repair_json(s: str) -> str:
"""Fix the most common LLM JSON formatting mistakes.
Handles:
- trailing commas before } or ] → {"a":1,} becomes {"a":1}
- single-quoted strings → {'a':'b'} becomes {"a":"b"}
- unquoted string keys → {a: "b"} becomes {"a": "b"}
"""
# trailing commas
s = _re.sub(r',\s*([}\]])', r'\1', s)
# single-quoted strings (careful around apostrophes in values)
s = _re.sub(r"'([^']*)'", r'"\1"', s)
# unquoted keys: word characters before a colon
s = _re.sub(r'(?<!["\w])(\w+)\s*:', r'"\1":', s)
return s
try: try:
import ollama as _ollama import ollama as _ollama
client = _ollama.Client(host=ollama_url) client = _ollama.Client(host=ollama_url)
response = client.chat( response = client.chat(
model=model, model=model,
format='json', # Ollama JSON mode — forces syntactically valid output
messages=[{ messages=[{
'role': 'user', 'role': 'user',
'content': ( 'content': (
@@ -148,12 +167,26 @@ def _ocr_image_vision(data: bytes, filename: str, ollama_url: str, model: str) -
filename) filename)
return '' return ''
json_str = raw[first:last + 1] json_str = raw[first:last + 1]
parsed = _json.loads(json_str)
# Parse — on failure attempt common repairs then retry once
try:
parsed = _json.loads(json_str)
except _json.JSONDecodeError as json_err:
repaired = _repair_json(json_str)
try:
parsed = _json.loads(repaired)
logger.debug('Vision OCR %s: JSON repaired successfully', filename)
except _json.JSONDecodeError:
logger.warning('Vision OCR %s: JSON parse failed (%s), falling back',
filename, json_err)
return ''
if 'amount' not in parsed: if 'amount' not in parsed:
logger.warning('Vision OCR %s: JSON missing amount field, falling back', filename) logger.warning('Vision OCR %s: JSON missing amount field, falling back', filename)
return '' return ''
logger.debug('Vision OCR %s (%s): extracted JSON ok', filename, model) logger.debug('Vision OCR %s (%s): extracted JSON ok', filename, model)
return json_str # Re-serialise so downstream always gets clean, canonical JSON
return _json.dumps(parsed)
except ImportError: except ImportError:
logger.warning('ollama package not installed — vision OCR unavailable for %s', filename) logger.warning('ollama package not installed — vision OCR unavailable for %s', filename)
return '' return ''