fix: vision OCR JSON failures — add format='json' and repair fallback
Three receipts per batch were failing with JSONDecodeError (e.g. "Expecting ':' delimiter: line 1 column 90") because activeblue-chat (llama3.2-vision) occasionally outputs near-JSON with trailing commas, single-quoted strings, or unquoted keys. Two-layer fix: 1. Add format='json' to the Ollama chat call — Ollama JSON mode forces syntactically valid output at the sampler level, eliminating most structural errors. 2. Add _repair_json() fallback that runs on any remaining JSONDecodeError: strips trailing commas, converts single→double quotes, and quotes unquoted keys. If repair succeeds, the result is re-serialised as canonical JSON before being returned. Also re-serialise with json.dumps() on success so the fast path in _parse_receipt_text always receives clean, canonical JSON regardless of whitespace or key ordering in the model's original output. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -105,11 +105,30 @@ def _ocr_image_vision(data: bytes, filename: str, ollama_url: str, model: str) -
|
|||||||
Returns empty string on any failure so the caller falls back to Tesseract.
|
Returns empty string on any failure so the caller falls back to Tesseract.
|
||||||
"""
|
"""
|
||||||
import json as _json
|
import json as _json
|
||||||
|
import re as _re
|
||||||
|
|
||||||
|
def _repair_json(s: str) -> str:
|
||||||
|
"""Fix the most common LLM JSON formatting mistakes.
|
||||||
|
|
||||||
|
Handles:
|
||||||
|
- trailing commas before } or ] → {"a":1,} becomes {"a":1}
|
||||||
|
- single-quoted strings → {'a':'b'} becomes {"a":"b"}
|
||||||
|
- unquoted string keys → {a: "b"} becomes {"a": "b"}
|
||||||
|
"""
|
||||||
|
# trailing commas
|
||||||
|
s = _re.sub(r',\s*([}\]])', r'\1', s)
|
||||||
|
# single-quoted strings (careful around apostrophes in values)
|
||||||
|
s = _re.sub(r"'([^']*)'", r'"\1"', s)
|
||||||
|
# unquoted keys: word characters before a colon
|
||||||
|
s = _re.sub(r'(?<!["\w])(\w+)\s*:', r'"\1":', s)
|
||||||
|
return s
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import ollama as _ollama
|
import ollama as _ollama
|
||||||
client = _ollama.Client(host=ollama_url)
|
client = _ollama.Client(host=ollama_url)
|
||||||
response = client.chat(
|
response = client.chat(
|
||||||
model=model,
|
model=model,
|
||||||
|
format='json', # Ollama JSON mode — forces syntactically valid output
|
||||||
messages=[{
|
messages=[{
|
||||||
'role': 'user',
|
'role': 'user',
|
||||||
'content': (
|
'content': (
|
||||||
@@ -148,12 +167,26 @@ def _ocr_image_vision(data: bytes, filename: str, ollama_url: str, model: str) -
|
|||||||
filename)
|
filename)
|
||||||
return ''
|
return ''
|
||||||
json_str = raw[first:last + 1]
|
json_str = raw[first:last + 1]
|
||||||
parsed = _json.loads(json_str)
|
|
||||||
|
# Parse — on failure attempt common repairs then retry once
|
||||||
|
try:
|
||||||
|
parsed = _json.loads(json_str)
|
||||||
|
except _json.JSONDecodeError as json_err:
|
||||||
|
repaired = _repair_json(json_str)
|
||||||
|
try:
|
||||||
|
parsed = _json.loads(repaired)
|
||||||
|
logger.debug('Vision OCR %s: JSON repaired successfully', filename)
|
||||||
|
except _json.JSONDecodeError:
|
||||||
|
logger.warning('Vision OCR %s: JSON parse failed (%s), falling back',
|
||||||
|
filename, json_err)
|
||||||
|
return ''
|
||||||
|
|
||||||
if 'amount' not in parsed:
|
if 'amount' not in parsed:
|
||||||
logger.warning('Vision OCR %s: JSON missing amount field, falling back', filename)
|
logger.warning('Vision OCR %s: JSON missing amount field, falling back', filename)
|
||||||
return ''
|
return ''
|
||||||
logger.debug('Vision OCR %s (%s): extracted JSON ok', filename, model)
|
logger.debug('Vision OCR %s (%s): extracted JSON ok', filename, model)
|
||||||
return json_str
|
# Re-serialise so downstream always gets clean, canonical JSON
|
||||||
|
return _json.dumps(parsed)
|
||||||
except ImportError:
|
except ImportError:
|
||||||
logger.warning('ollama package not installed — vision OCR unavailable for %s', filename)
|
logger.warning('ollama package not installed — vision OCR unavailable for %s', filename)
|
||||||
return ''
|
return ''
|
||||||
|
|||||||
Reference in New Issue
Block a user