Remove vision OCR — use Tesseract-only pipeline for receipt parsing

The llama3.2-vision model was producing unreliable structured data
(wrong vendors, amounts, dates) making expense reports worse than
Tesseract + LLM extraction.  Removes _ocr_image_vision(), the
vision JSON fast path in _parse_receipt_text(), _match_category(),
and the vision_ocr_model config setting entirely.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Carlos Garcia
2026-05-20 22:32:26 -04:00
parent ec6b41943f
commit 0320591344
4 changed files with 4 additions and 247 deletions

View File

@@ -317,30 +317,6 @@ class ExpensesAgent(BaseAgent):
return None
@staticmethod
def _match_category(category: str, expense_products: list) -> str:
"""Map a vision-model category label to the nearest expense product name.
Tries exact/substring match first, then a fuzzy SequenceMatcher pass.
Returns empty string when no reasonable match is found.
"""
if not expense_products or not category:
return ''
cat = category.lower().strip()
# Exact or substring match
for p in expense_products:
name = p['name'].lower()
if cat == name or cat in name or name in cat:
return p['name']
# Fuzzy fallback (ratio >= 0.4)
names_lower = [p['name'].lower() for p in expense_products]
matches = difflib.get_close_matches(cat, names_lower, n=1, cutoff=0.4)
if matches:
for p in expense_products:
if p['name'].lower() == matches[0]:
return p['name']
return ''
async def _parse_receipt_text(self, text: str, filename: str,
expense_products: list = None,
date_hint: str = None) -> dict:
@@ -348,35 +324,7 @@ class ExpensesAgent(BaseAgent):
fallback = {'vendor': filename, 'amount': 0.0,
'date': date_hint or today, 'time': None, 'product_name': ''}
# ── Fast path: vision model already returned structured JSON ──────────
# receipt_parser._ocr_image_vision() returns a JSON string directly
# when a vision model is configured. Skip the second LLM call entirely.
stripped = (text or '').strip()
if stripped.startswith('{'):
try:
data = json.loads(stripped)
if 'amount' in data:
logger.debug('expenses_agent: using vision pre-extracted JSON for %s', filename)
# Map the vision category label → expense product name
product_name = self._match_category(
data.get('category', ''), expense_products or [])
# Vision model sometimes returns the string "null" instead
# of JSON null — normalise both fields.
_NULL = (None, 'null', 'None', '')
raw_time = data.get('time')
time_val = None if raw_time in _NULL else str(raw_time)
raw_date = data.get('date')
date_val = None if raw_date in _NULL else str(raw_date)
return {
'vendor': str(data.get('vendor') or filename),
'amount': float(data.get('amount', 0.0)),
'date': date_val or date_hint or today,
'time': time_val,
'product_name': product_name,
}
except (json.JSONDecodeError, ValueError, TypeError):
pass # not clean JSON — fall through to LLM path
ocr_failed = not stripped or stripped.startswith('[')
product_list = ''