fix: vision OCR JSON failures — add format='json' and repair fallback

Three receipts per batch were failing with JSONDecodeError (e.g.
"Expecting ':' delimiter: line 1 column 90") because activeblue-chat
(llama3.2-vision) occasionally outputs near-JSON with trailing commas,
single-quoted strings, or unquoted keys.

Two-layer fix:
1. Add format='json' to the Ollama chat call — Ollama JSON mode forces
   syntactically valid output at the sampler level, eliminating most
   structural errors.
2. Add _repair_json() fallback that runs on any remaining JSONDecodeError:
   strips trailing commas, converts single→double quotes, and quotes
   unquoted keys. If repair succeeds, the result is re-serialised as
   canonical JSON before being returned.

Also re-serialise with json.dumps() on success so the fast path in
_parse_receipt_text always receives clean, canonical JSON regardless of
whitespace or key ordering in the model's original output.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Carlos Garcia
2026-05-20 22:24:50 -04:00
parent 9fa391c720
commit ec6b41943f

View File

@@ -105,11 +105,30 @@ def _ocr_image_vision(data: bytes, filename: str, ollama_url: str, model: str) -
Returns empty string on any failure so the caller falls back to Tesseract.
"""
import json as _json
import re as _re
def _repair_json(s: str) -> str:
"""Fix the most common LLM JSON formatting mistakes.
Handles:
- trailing commas before } or ] → {"a":1,} becomes {"a":1}
- single-quoted strings → {'a':'b'} becomes {"a":"b"}
- unquoted string keys → {a: "b"} becomes {"a": "b"}
"""
# trailing commas
s = _re.sub(r',\s*([}\]])', r'\1', s)
# single-quoted strings (careful around apostrophes in values)
s = _re.sub(r"'([^']*)'", r'"\1"', s)
# unquoted keys: word characters before a colon
s = _re.sub(r'(?<!["\w])(\w+)\s*:', r'"\1":', s)
return s
try:
import ollama as _ollama
client = _ollama.Client(host=ollama_url)
response = client.chat(
model=model,
format='json', # Ollama JSON mode — forces syntactically valid output
messages=[{
'role': 'user',
'content': (
@@ -148,12 +167,26 @@ def _ocr_image_vision(data: bytes, filename: str, ollama_url: str, model: str) -
filename)
return ''
json_str = raw[first:last + 1]
parsed = _json.loads(json_str)
# Parse — on failure attempt common repairs then retry once
try:
parsed = _json.loads(json_str)
except _json.JSONDecodeError as json_err:
repaired = _repair_json(json_str)
try:
parsed = _json.loads(repaired)
logger.debug('Vision OCR %s: JSON repaired successfully', filename)
except _json.JSONDecodeError:
logger.warning('Vision OCR %s: JSON parse failed (%s), falling back',
filename, json_err)
return ''
if 'amount' not in parsed:
logger.warning('Vision OCR %s: JSON missing amount field, falling back', filename)
return ''
logger.debug('Vision OCR %s (%s): extracted JSON ok', filename, model)
return json_str
# Re-serialise so downstream always gets clean, canonical JSON
return _json.dumps(parsed)
except ImportError:
logger.warning('ollama package not installed — vision OCR unavailable for %s', filename)
return ''