fix(expenses): detect bank statements, fix default category, improve prompts

- Add _is_likely_bank_statement(): if OCR text has ≥10 lines with dollar amounts it is almost certainly a bank/card statement screenshot, not a single receipt. Return skip=True so _act() skips it and adds a note to the escalations list instead of creating a $1,699 expense line. - Fix default product selection in _act(): prefer "Meals" over whatever happens to be first in Odoo's expense product list ("Communication"), so unrecognised receipts get a sensible fallback category. - Improve LLM category prompt: remove hardcoded product names (airline → Transport) that don't exist in every Odoo install; describe business types semantically so the model picks from the actual available list. - Mention skipped statements in the final summary message. - 77 tests, all passing. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-21 00:25:44 -04:00
parent 6287b3bcef
commit 77fab52475
2 changed files with 98 additions and 7 deletions
--- a/tests/test_expenses_agent.py
+++ b/tests/test_expenses_agent.py
@@ -427,7 +427,7 @@ async def test_act_no_employee_returns_empty_and_escalates():
 # ---------------------------------------------------------------------------

 from agent_service.agents.expenses_agent import (
-    _extract_amount_from_text, _extract_date_from_text,
+    _extract_amount_from_text, _extract_date_from_text, _is_likely_bank_statement,
 )


@@ -502,6 +502,48 @@ class TestExtractAmount:
        assert _extract_amount_from_text(text) == 8.49


+class TestBankStatementDetection:
+    def _stmt(self, n: int) -> str:
+        """Generate fake bank statement with n transaction lines."""
+        lines = [f'05/{i+1:02d}  MERCHANT {i}  $1{i}.99' for i in range(n)]
+        return '\n'.join(lines)
+
+    def test_receipt_not_flagged(self):
+        # A typical restaurant receipt has < 10 amount-bearing lines
+        text = 'Acme Cafe\nBurger 12.99\nFries 4.50\nDrink 2.99\nTax 1.65\nTotal 22.13'
+        assert _is_likely_bank_statement(text) is False
+
+    def test_statement_flagged(self):
+        # 10 transaction lines → flagged as statement
+        assert _is_likely_bank_statement(self._stmt(10)) is True
+
+    def test_threshold_boundary(self):
+        assert _is_likely_bank_statement(self._stmt(9)) is False
+        assert _is_likely_bank_statement(self._stmt(10)) is True
+
+    def test_empty_text(self):
+        assert _is_likely_bank_statement('') is False
+
+    def test_no_amounts(self):
+        assert _is_likely_bank_statement('Hello world\nNo prices here') is False
+
+
+@pytest.mark.asyncio
+async def test_parse_bank_statement_returns_skip():
+    """Bank statement image must be skipped — no amount, skip=True returned."""
+    agent = _make_agent()
+    # Build fake OCR text with 12 transaction lines
+    stmt_text = '\n'.join(
+        f'05/{i+1:02d}  SOME MERCHANT {i}  ${10 + i}.99' for i in range(12)
+    )
+    result = await agent._parse_receipt_text(
+        stmt_text, '2026-05-15_bank.png',
+        expense_products=[{'id': 1, 'name': 'Meals'}],
+    )
+    assert result.get('skip') is True
+    assert result['amount'] == 0.0
+
+
 class TestExtractDate:
    def test_iso_format(self):
        assert _extract_date_from_text('Date: 2026-05-09') == '2026-05-09'