fix: vision OCR JSON failures — add format='json' and repair fallback
Three receipts per batch were failing with JSONDecodeError (e.g. "Expecting ':' delimiter: line 1 column 90") because activeblue-chat (llama3.2-vision) occasionally outputs near-JSON with trailing commas, single-quoted strings, or unquoted keys. Two-layer fix: 1. Add format='json' to the Ollama chat call — Ollama JSON mode forces syntactically valid output at the sampler level, eliminating most structural errors. 2. Add _repair_json() fallback that runs on any remaining JSONDecodeError: strips trailing commas, converts single→double quotes, and quotes unquoted keys. If repair succeeds, the result is re-serialised as canonical JSON before being returned. Also re-serialise with json.dumps() on success so the fast path in _parse_receipt_text always receives clean, canonical JSON regardless of whitespace or key ordering in the model's original output. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -105,11 +105,30 @@ def _ocr_image_vision(data: bytes, filename: str, ollama_url: str, model: str) -
|
||||
Returns empty string on any failure so the caller falls back to Tesseract.
|
||||
"""
|
||||
import json as _json
|
||||
import re as _re
|
||||
|
||||
def _repair_json(s: str) -> str:
|
||||
"""Fix the most common LLM JSON formatting mistakes.
|
||||
|
||||
Handles:
|
||||
- trailing commas before } or ] → {"a":1,} becomes {"a":1}
|
||||
- single-quoted strings → {'a':'b'} becomes {"a":"b"}
|
||||
- unquoted string keys → {a: "b"} becomes {"a": "b"}
|
||||
"""
|
||||
# trailing commas
|
||||
s = _re.sub(r',\s*([}\]])', r'\1', s)
|
||||
# single-quoted strings (careful around apostrophes in values)
|
||||
s = _re.sub(r"'([^']*)'", r'"\1"', s)
|
||||
# unquoted keys: word characters before a colon
|
||||
s = _re.sub(r'(?<!["\w])(\w+)\s*:', r'"\1":', s)
|
||||
return s
|
||||
|
||||
try:
|
||||
import ollama as _ollama
|
||||
client = _ollama.Client(host=ollama_url)
|
||||
response = client.chat(
|
||||
model=model,
|
||||
format='json', # Ollama JSON mode — forces syntactically valid output
|
||||
messages=[{
|
||||
'role': 'user',
|
||||
'content': (
|
||||
@@ -148,12 +167,26 @@ def _ocr_image_vision(data: bytes, filename: str, ollama_url: str, model: str) -
|
||||
filename)
|
||||
return ''
|
||||
json_str = raw[first:last + 1]
|
||||
parsed = _json.loads(json_str)
|
||||
|
||||
# Parse — on failure attempt common repairs then retry once
|
||||
try:
|
||||
parsed = _json.loads(json_str)
|
||||
except _json.JSONDecodeError as json_err:
|
||||
repaired = _repair_json(json_str)
|
||||
try:
|
||||
parsed = _json.loads(repaired)
|
||||
logger.debug('Vision OCR %s: JSON repaired successfully', filename)
|
||||
except _json.JSONDecodeError:
|
||||
logger.warning('Vision OCR %s: JSON parse failed (%s), falling back',
|
||||
filename, json_err)
|
||||
return ''
|
||||
|
||||
if 'amount' not in parsed:
|
||||
logger.warning('Vision OCR %s: JSON missing amount field, falling back', filename)
|
||||
return ''
|
||||
logger.debug('Vision OCR %s (%s): extracted JSON ok', filename, model)
|
||||
return json_str
|
||||
# Re-serialise so downstream always gets clean, canonical JSON
|
||||
return _json.dumps(parsed)
|
||||
except ImportError:
|
||||
logger.warning('ollama package not installed — vision OCR unavailable for %s', filename)
|
||||
return ''
|
||||
|
||||
Reference in New Issue
Block a user