Remove vision OCR — use Tesseract-only pipeline for receipt parsing

The llama3.2-vision model was producing unreliable structured data (wrong vendors, amounts, dates) making expense reports worse than Tesseract + LLM extraction. Removes _ocr_image_vision(), the vision JSON fast path in _parse_receipt_text(), _match_category(), and the vision_ocr_model config setting entirely. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-20 22:32:26 -04:00
parent ec6b41943f
commit 0320591344
4 changed files with 4 additions and 247 deletions
--- a/agent_service/agents/expenses_agent.py
+++ b/agent_service/agents/expenses_agent.py
@@ -317,30 +317,6 @@ class ExpensesAgent(BaseAgent):

        return None

-    @staticmethod
-    def _match_category(category: str, expense_products: list) -> str:
-        """Map a vision-model category label to the nearest expense product name.
-
-        Tries exact/substring match first, then a fuzzy SequenceMatcher pass.
-        Returns empty string when no reasonable match is found.
-        """
-        if not expense_products or not category:
-            return ''
-        cat = category.lower().strip()
-        # Exact or substring match
-        for p in expense_products:
-            name = p['name'].lower()
-            if cat == name or cat in name or name in cat:
-                return p['name']
-        # Fuzzy fallback (ratio >= 0.4)
-        names_lower = [p['name'].lower() for p in expense_products]
-        matches = difflib.get_close_matches(cat, names_lower, n=1, cutoff=0.4)
-        if matches:
-            for p in expense_products:
-                if p['name'].lower() == matches[0]:
-                    return p['name']
-        return ''
-
    async def _parse_receipt_text(self, text: str, filename: str,
                                   expense_products: list = None,
                                   date_hint: str = None) -> dict:
@@ -348,35 +324,7 @@ class ExpensesAgent(BaseAgent):
        fallback = {'vendor': filename, 'amount': 0.0,
                    'date': date_hint or today, 'time': None, 'product_name': ''}

-        # ── Fast path: vision model already returned structured JSON ──────────
-        # receipt_parser._ocr_image_vision() returns a JSON string directly
-        # when a vision model is configured.  Skip the second LLM call entirely.
        stripped = (text or '').strip()
-        if stripped.startswith('{'):
-            try:
-                data = json.loads(stripped)
-                if 'amount' in data:
-                    logger.debug('expenses_agent: using vision pre-extracted JSON for %s', filename)
-                    # Map the vision category label → expense product name
-                    product_name = self._match_category(
-                        data.get('category', ''), expense_products or [])
-                    # Vision model sometimes returns the string "null" instead
-                    # of JSON null — normalise both fields.
-                    _NULL = (None, 'null', 'None', '')
-                    raw_time = data.get('time')
-                    time_val = None if raw_time in _NULL else str(raw_time)
-                    raw_date = data.get('date')
-                    date_val = None if raw_date in _NULL else str(raw_date)
-                    return {
-                        'vendor': str(data.get('vendor') or filename),
-                        'amount': float(data.get('amount', 0.0)),
-                        'date': date_val or date_hint or today,
-                        'time': time_val,
-                        'product_name': product_name,
-                    }
-            except (json.JSONDecodeError, ValueError, TypeError):
-                pass  # not clean JSON — fall through to LLM path
-
        ocr_failed = not stripped or stripped.startswith('[')

        product_list = ''