diff --git a/agent_service/agents/expenses_agent.py b/agent_service/agents/expenses_agent.py index fe20f25..00a2367 100644 --- a/agent_service/agents/expenses_agent.py +++ b/agent_service/agents/expenses_agent.py @@ -317,30 +317,6 @@ class ExpensesAgent(BaseAgent): return None - @staticmethod - def _match_category(category: str, expense_products: list) -> str: - """Map a vision-model category label to the nearest expense product name. - - Tries exact/substring match first, then a fuzzy SequenceMatcher pass. - Returns empty string when no reasonable match is found. - """ - if not expense_products or not category: - return '' - cat = category.lower().strip() - # Exact or substring match - for p in expense_products: - name = p['name'].lower() - if cat == name or cat in name or name in cat: - return p['name'] - # Fuzzy fallback (ratio >= 0.4) - names_lower = [p['name'].lower() for p in expense_products] - matches = difflib.get_close_matches(cat, names_lower, n=1, cutoff=0.4) - if matches: - for p in expense_products: - if p['name'].lower() == matches[0]: - return p['name'] - return '' - async def _parse_receipt_text(self, text: str, filename: str, expense_products: list = None, date_hint: str = None) -> dict: @@ -348,35 +324,7 @@ class ExpensesAgent(BaseAgent): fallback = {'vendor': filename, 'amount': 0.0, 'date': date_hint or today, 'time': None, 'product_name': ''} - # ── Fast path: vision model already returned structured JSON ────────── - # receipt_parser._ocr_image_vision() returns a JSON string directly - # when a vision model is configured. Skip the second LLM call entirely. stripped = (text or '').strip() - if stripped.startswith('{'): - try: - data = json.loads(stripped) - if 'amount' in data: - logger.debug('expenses_agent: using vision pre-extracted JSON for %s', filename) - # Map the vision category label → expense product name - product_name = self._match_category( - data.get('category', ''), expense_products or []) - # Vision model sometimes returns the string "null" instead - # of JSON null — normalise both fields. - _NULL = (None, 'null', 'None', '') - raw_time = data.get('time') - time_val = None if raw_time in _NULL else str(raw_time) - raw_date = data.get('date') - date_val = None if raw_date in _NULL else str(raw_date) - return { - 'vendor': str(data.get('vendor') or filename), - 'amount': float(data.get('amount', 0.0)), - 'date': date_val or date_hint or today, - 'time': time_val, - 'product_name': product_name, - } - except (json.JSONDecodeError, ValueError, TypeError): - pass # not clean JSON — fall through to LLM path - ocr_failed = not stripped or stripped.startswith('[') product_list = '' diff --git a/agent_service/config.py b/agent_service/config.py index 0c0d59a..3a11b0d 100644 --- a/agent_service/config.py +++ b/agent_service/config.py @@ -16,10 +16,6 @@ class Settings(BaseSettings): ollama_model: str = 'activeblue-chat' ollama_timeout: int = 300 ollama_max_concurrent: int = 2 - # Set to a vision-capable model (e.g. llama3.2-vision:11b) to use - # vision OCR for receipt images instead of Tesseract. Leave empty - # to keep the Tesseract pipeline. - vision_ocr_model: str = '' # Anthropic / Claude anthropic_api_key: str = '' diff --git a/agent_service/tools/receipt_parser.py b/agent_service/tools/receipt_parser.py index ce69ac9..981cca2 100644 --- a/agent_service/tools/receipt_parser.py +++ b/agent_service/tools/receipt_parser.py @@ -80,121 +80,10 @@ def _extract_zip(zip_filename: str, data: bytes) -> list[dict]: def _ocr_image(data: bytes, filename: str) -> str: - """Extract text from a receipt image. - - Tries vision-model OCR first when VISION_OCR_MODEL is configured, - then falls back to the Tesseract pipeline. - """ - from agent_service.config import get_settings - settings = get_settings() - if settings.vision_ocr_model: - result = _ocr_image_vision(data, filename, - settings.ollama_url, - settings.vision_ocr_model) - if result: - return result - logger.warning('Vision OCR returned empty for %s — falling back to Tesseract', filename) + """Extract text from a receipt image using Tesseract.""" return _ocr_image_tesseract(data, filename) -def _ocr_image_vision(data: bytes, filename: str, ollama_url: str, model: str) -> str: - """Use an Ollama vision model to extract receipt data directly as JSON. - - Returns a JSON string {vendor, amount, date, time, category} so the - expenses agent can skip the second LLM extraction step entirely. - Returns empty string on any failure so the caller falls back to Tesseract. - """ - import json as _json - import re as _re - - def _repair_json(s: str) -> str: - """Fix the most common LLM JSON formatting mistakes. - - Handles: - - trailing commas before } or ] → {"a":1,} becomes {"a":1} - - single-quoted strings → {'a':'b'} becomes {"a":"b"} - - unquoted string keys → {a: "b"} becomes {"a": "b"} - """ - # trailing commas - s = _re.sub(r',\s*([}\]])', r'\1', s) - # single-quoted strings (careful around apostrophes in values) - s = _re.sub(r"'([^']*)'", r'"\1"', s) - # unquoted keys: word characters before a colon - s = _re.sub(r'(? str: """Tesseract-based OCR pipeline (fallback).""" try: diff --git a/tests/test_expenses_agent.py b/tests/test_expenses_agent.py index ddfe128..0b1b332 100644 --- a/tests/test_expenses_agent.py +++ b/tests/test_expenses_agent.py @@ -423,88 +423,12 @@ async def test_act_no_employee_returns_empty_and_escalates(): # --------------------------------------------------------------------------- -# _match_category -# --------------------------------------------------------------------------- - -class TestMatchCategory: - PRODUCTS = [ - {'id': 1, 'name': 'Meals'}, - {'id': 2, 'name': 'Fuel'}, - {'id': 3, 'name': 'Hotel'}, - {'id': 4, 'name': 'Office Supplies'}, - {'id': 5, 'name': 'Transport'}, - {'id': 6, 'name': 'Other'}, - ] - - def test_exact_match(self): - assert ExpensesAgent._match_category('Meals', self.PRODUCTS) == 'Meals' - - def test_case_insensitive(self): - assert ExpensesAgent._match_category('meals', self.PRODUCTS) == 'Meals' - assert ExpensesAgent._match_category('FUEL', self.PRODUCTS) == 'Fuel' - - def test_substring_match(self): - # 'office' is a substring of 'Office Supplies' - assert ExpensesAgent._match_category('office', self.PRODUCTS) == 'Office Supplies' - - def test_fuzzy_match(self): - # 'transport' is close to 'Transport' - assert ExpensesAgent._match_category('transport', self.PRODUCTS) == 'Transport' - - def test_no_match_returns_empty(self): - assert ExpensesAgent._match_category('zxqwerty', self.PRODUCTS) == '' - - def test_empty_category(self): - assert ExpensesAgent._match_category('', self.PRODUCTS) == '' - - def test_empty_products(self): - assert ExpensesAgent._match_category('meals', []) == '' - - -# --------------------------------------------------------------------------- -# _parse_receipt_text — vision JSON fast path +# _parse_receipt_text — LLM extraction path # --------------------------------------------------------------------------- @pytest.mark.asyncio -async def test_parse_vision_json_fast_path(): - """When text is pre-extracted JSON from vision model, skip LLM call.""" - agent = _make_agent() - agent._llm.submit = AsyncMock() # should NOT be called - - vision_json = ('{"vendor":"McDonald\'s","amount":12.50,' - '"date":"2026-05-09","time":"13:30","category":"meals"}') - products = [{'id': 1, 'name': 'Meals'}, {'id': 2, 'name': 'Fuel'}] - - result = await agent._parse_receipt_text(vision_json, 'receipt.jpg', - expense_products=products) - - assert result['vendor'] == "McDonald's" - assert result['amount'] == 12.50 - assert result['date'] == '2026-05-09' - assert result['time'] == '13:30' - assert result['product_name'] == 'Meals' - agent._llm.submit.assert_not_called() - - -@pytest.mark.asyncio -async def test_parse_vision_json_null_time(): - """Vision model may return the string 'null' for time — normalise to None.""" - agent = _make_agent() - agent._llm.submit = AsyncMock() - - vision_json = '{"vendor":"Shell","amount":45.00,"date":"2026-05-09","time":"null","category":"fuel"}' - products = [{'id': 1, 'name': 'Meals'}, {'id': 2, 'name': 'Fuel'}] - - result = await agent._parse_receipt_text(vision_json, 'shell.jpg', - expense_products=products) - assert result['time'] is None - assert result['product_name'] == 'Fuel' - agent._llm.submit.assert_not_called() - - -@pytest.mark.asyncio -async def test_parse_non_json_text_falls_through_to_llm(): - """Plain OCR text (not JSON) should go through the LLM extraction path.""" +async def test_parse_plain_ocr_text_uses_llm(): + """Plain OCR text should go through the LLM extraction path.""" agent = _make_agent() llm_resp = MagicMock() llm_resp.content = '{"vendor":"Acme","amount":9.99,"date":"2026-05-09","time":null,"product_name":"Meals"}'