fix(expenses): detect bank statements, fix default category, improve prompts

- Add _is_likely_bank_statement(): if OCR text has ≥10 lines with dollar
  amounts it is almost certainly a bank/card statement screenshot, not a
  single receipt.  Return skip=True so _act() skips it and adds a note to
  the escalations list instead of creating a $1,699 expense line.
- Fix default product selection in _act(): prefer "Meals" over whatever
  happens to be first in Odoo's expense product list ("Communication"),
  so unrecognised receipts get a sensible fallback category.
- Improve LLM category prompt: remove hardcoded product names (airline →
  Transport) that don't exist in every Odoo install; describe business
  types semantically so the model picks from the actual available list.
- Mention skipped statements in the final summary message.
- 77 tests, all passing.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Carlos Garcia
2026-05-21 00:25:44 -04:00
parent 6287b3bcef
commit 77fab52475
2 changed files with 98 additions and 7 deletions

View File

@@ -427,7 +427,7 @@ async def test_act_no_employee_returns_empty_and_escalates():
# ---------------------------------------------------------------------------
from agent_service.agents.expenses_agent import (
_extract_amount_from_text, _extract_date_from_text,
_extract_amount_from_text, _extract_date_from_text, _is_likely_bank_statement,
)
@@ -502,6 +502,48 @@ class TestExtractAmount:
assert _extract_amount_from_text(text) == 8.49
class TestBankStatementDetection:
def _stmt(self, n: int) -> str:
"""Generate fake bank statement with n transaction lines."""
lines = [f'05/{i+1:02d} MERCHANT {i} $1{i}.99' for i in range(n)]
return '\n'.join(lines)
def test_receipt_not_flagged(self):
# A typical restaurant receipt has < 10 amount-bearing lines
text = 'Acme Cafe\nBurger 12.99\nFries 4.50\nDrink 2.99\nTax 1.65\nTotal 22.13'
assert _is_likely_bank_statement(text) is False
def test_statement_flagged(self):
# 10 transaction lines → flagged as statement
assert _is_likely_bank_statement(self._stmt(10)) is True
def test_threshold_boundary(self):
assert _is_likely_bank_statement(self._stmt(9)) is False
assert _is_likely_bank_statement(self._stmt(10)) is True
def test_empty_text(self):
assert _is_likely_bank_statement('') is False
def test_no_amounts(self):
assert _is_likely_bank_statement('Hello world\nNo prices here') is False
@pytest.mark.asyncio
async def test_parse_bank_statement_returns_skip():
"""Bank statement image must be skipped — no amount, skip=True returned."""
agent = _make_agent()
# Build fake OCR text with 12 transaction lines
stmt_text = '\n'.join(
f'05/{i+1:02d} SOME MERCHANT {i} ${10 + i}.99' for i in range(12)
)
result = await agent._parse_receipt_text(
stmt_text, '2026-05-15_bank.png',
expense_products=[{'id': 1, 'name': 'Meals'}],
)
assert result.get('skip') is True
assert result['amount'] == 0.0
class TestExtractDate:
def test_iso_format(self):
assert _extract_date_from_text('Date: 2026-05-09') == '2026-05-09'