Remove vision OCR — use Tesseract-only pipeline for receipt parsing
The llama3.2-vision model was producing unreliable structured data (wrong vendors, amounts, dates) making expense reports worse than Tesseract + LLM extraction. Removes _ocr_image_vision(), the vision JSON fast path in _parse_receipt_text(), _match_category(), and the vision_ocr_model config setting entirely. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -317,30 +317,6 @@ class ExpensesAgent(BaseAgent):
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _match_category(category: str, expense_products: list) -> str:
|
||||
"""Map a vision-model category label to the nearest expense product name.
|
||||
|
||||
Tries exact/substring match first, then a fuzzy SequenceMatcher pass.
|
||||
Returns empty string when no reasonable match is found.
|
||||
"""
|
||||
if not expense_products or not category:
|
||||
return ''
|
||||
cat = category.lower().strip()
|
||||
# Exact or substring match
|
||||
for p in expense_products:
|
||||
name = p['name'].lower()
|
||||
if cat == name or cat in name or name in cat:
|
||||
return p['name']
|
||||
# Fuzzy fallback (ratio >= 0.4)
|
||||
names_lower = [p['name'].lower() for p in expense_products]
|
||||
matches = difflib.get_close_matches(cat, names_lower, n=1, cutoff=0.4)
|
||||
if matches:
|
||||
for p in expense_products:
|
||||
if p['name'].lower() == matches[0]:
|
||||
return p['name']
|
||||
return ''
|
||||
|
||||
async def _parse_receipt_text(self, text: str, filename: str,
|
||||
expense_products: list = None,
|
||||
date_hint: str = None) -> dict:
|
||||
@@ -348,35 +324,7 @@ class ExpensesAgent(BaseAgent):
|
||||
fallback = {'vendor': filename, 'amount': 0.0,
|
||||
'date': date_hint or today, 'time': None, 'product_name': ''}
|
||||
|
||||
# ── Fast path: vision model already returned structured JSON ──────────
|
||||
# receipt_parser._ocr_image_vision() returns a JSON string directly
|
||||
# when a vision model is configured. Skip the second LLM call entirely.
|
||||
stripped = (text or '').strip()
|
||||
if stripped.startswith('{'):
|
||||
try:
|
||||
data = json.loads(stripped)
|
||||
if 'amount' in data:
|
||||
logger.debug('expenses_agent: using vision pre-extracted JSON for %s', filename)
|
||||
# Map the vision category label → expense product name
|
||||
product_name = self._match_category(
|
||||
data.get('category', ''), expense_products or [])
|
||||
# Vision model sometimes returns the string "null" instead
|
||||
# of JSON null — normalise both fields.
|
||||
_NULL = (None, 'null', 'None', '')
|
||||
raw_time = data.get('time')
|
||||
time_val = None if raw_time in _NULL else str(raw_time)
|
||||
raw_date = data.get('date')
|
||||
date_val = None if raw_date in _NULL else str(raw_date)
|
||||
return {
|
||||
'vendor': str(data.get('vendor') or filename),
|
||||
'amount': float(data.get('amount', 0.0)),
|
||||
'date': date_val or date_hint or today,
|
||||
'time': time_val,
|
||||
'product_name': product_name,
|
||||
}
|
||||
except (json.JSONDecodeError, ValueError, TypeError):
|
||||
pass # not clean JSON — fall through to LLM path
|
||||
|
||||
ocr_failed = not stripped or stripped.startswith('[')
|
||||
|
||||
product_list = ''
|
||||
|
||||
Reference in New Issue
Block a user