fix(expenses): detect bank statements, fix default category, improve prompts
- Add _is_likely_bank_statement(): if OCR text has ≥10 lines with dollar
amounts it is almost certainly a bank/card statement screenshot, not a
single receipt. Return skip=True so _act() skips it and adds a note to
the escalations list instead of creating a $1,699 expense line.
- Fix default product selection in _act(): prefer "Meals" over whatever
happens to be first in Odoo's expense product list ("Communication"),
so unrecognised receipts get a sensible fallback category.
- Improve LLM category prompt: remove hardcoded product names (airline →
Transport) that don't exist in every Odoo install; describe business
types semantically so the model picks from the actual available list.
- Mention skipped statements in the final summary message.
- 77 tests, all passing.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -32,6 +32,21 @@ _SKIP_LINE_RE = re.compile(
|
||||
# Any standalone dollar-like amount (optional $, up to 6 digits, 2 decimals)
|
||||
_ANY_DOLLAR_RE = re.compile(r'(?<!\d)\$?\s*([\d,]{1,6}\.\d{2})(?!\d)')
|
||||
|
||||
# A single receipt has at most ~10 lines with dollar amounts (items + tax + total).
|
||||
# Bank / credit-card statements have far more (one per transaction).
|
||||
_STMT_AMOUNT_LINE_THRESHOLD = 10
|
||||
|
||||
|
||||
def _is_likely_bank_statement(text: str) -> bool:
|
||||
"""Return True when the OCR text has too many amount-bearing lines to be a receipt.
|
||||
|
||||
Single receipts: typically 1-9 lines with dollar values.
|
||||
Bank/card statements: 10-50+ lines (one per transaction).
|
||||
"""
|
||||
count = sum(1 for line in text.splitlines() if _ANY_DOLLAR_RE.search(line))
|
||||
return count >= _STMT_AMOUNT_LINE_THRESHOLD
|
||||
|
||||
|
||||
_DATE_ISO_RE = re.compile(r'\b(\d{4})[-/](\d{2})[-/](\d{2})\b') # YYYY-MM-DD or YYYY/MM/DD
|
||||
_DATE_US_RE = re.compile(r'\b(\d{1,2})[/\-](\d{1,2})[/\-](\d{4})\b') # M/D/YYYY
|
||||
_DATE_US_SHORT_RE = re.compile(r'\b(\d{1,2})[/\-](\d{1,2})[/\-](\d{2})\b') # M/D/YY
|
||||
@@ -237,7 +252,14 @@ class ExpensesAgent(BaseAgent):
|
||||
return []
|
||||
|
||||
expense_products = await self._et.get_expense_products()
|
||||
default_product_id = expense_products[0]['id'] if expense_products else None
|
||||
# Prefer "Meals" as the fallback category — most receipts are food.
|
||||
# Avoid blindly defaulting to whatever Odoo returns first (often "Communication").
|
||||
_meals = next((p for p in expense_products
|
||||
if p['name'].lower() == 'meals'), None)
|
||||
default_product_id = (
|
||||
_meals['id'] if _meals
|
||||
else (expense_products[0]['id'] if expense_products else None)
|
||||
)
|
||||
product_map = {p['id']: p['name'] for p in expense_products}
|
||||
logger.info('expenses_agent: %d receipts received, %d expense products available',
|
||||
len(receipts), len(expense_products))
|
||||
@@ -281,6 +303,14 @@ class ExpensesAgent(BaseAgent):
|
||||
parsed = {'vendor': receipt.get('filename', 'Expense'), 'amount': 0.0,
|
||||
'date': receipt.get('date_from_name') or _date.today().isoformat(),
|
||||
'time': None, 'product_name': ''}
|
||||
if parsed.get('skip'):
|
||||
logger.info('expenses_agent: skipping bank/card statement: %s',
|
||||
receipt.get('filename'))
|
||||
self._escalations_list.append(
|
||||
f"Skipped \"{receipt.get('filename')}\": "
|
||||
'looks like a bank or card statement, not a single receipt.'
|
||||
)
|
||||
continue
|
||||
logger.info('parsed filename=%r → vendor=%r amount=%s date=%r product=%r',
|
||||
receipt.get('filename'), parsed.get('vendor'),
|
||||
parsed.get('amount'), parsed.get('date'), parsed.get('product_name'))
|
||||
@@ -447,6 +477,20 @@ class ExpensesAgent(BaseAgent):
|
||||
stripped = (text or '').strip()
|
||||
ocr_failed = not stripped or stripped.startswith('[')
|
||||
|
||||
# ── Bank / card statement detection ──────────────────────────────────
|
||||
# A statement screenshot has many amount-bearing lines; running the
|
||||
# max-scan on it returns a random large transaction, not a total.
|
||||
# Skip these files so they don't produce a wildly wrong expense.
|
||||
if not ocr_failed and _is_likely_bank_statement(stripped):
|
||||
n = sum(1 for l in stripped.splitlines() if _ANY_DOLLAR_RE.search(l))
|
||||
logger.warning(
|
||||
'receipt %s: looks like a bank/card statement (%d amount lines) — skip',
|
||||
filename, n,
|
||||
)
|
||||
return {'vendor': filename, 'amount': 0.0,
|
||||
'date': date_hint or today, 'time': None,
|
||||
'product_name': '', 'skip': True}
|
||||
|
||||
# ── Amount: regex (deterministic) ────────────────────────────────────
|
||||
amount = _extract_amount_from_text(stripped) if not ocr_failed else 0.0
|
||||
|
||||
@@ -475,10 +519,13 @@ class ExpensesAgent(BaseAgent):
|
||||
'If this looks like a bank or credit-card statement listing '
|
||||
'multiple transactions rather than a single merchant receipt, '
|
||||
'use "". Use "" if no clear business name is visible.\n'
|
||||
f'"product_name": the single best match from [{product_list}] '
|
||||
'based on the type of business (restaurant→Meals, gas station→Fuel, '
|
||||
'hotel→Hotel, airline/transit→Transport, office store→Office Supplies). '
|
||||
'Use "" if none fit.\n\n'
|
||||
f'"product_name": pick the single best match from [{product_list}]. '
|
||||
'Guide: restaurant / cafe / fast food → food/meal product; '
|
||||
'airline / airport / transit / taxi / parking / rental car → travel product; '
|
||||
'gas station / petrol / fuel → fuel product; '
|
||||
'hotel / motel / lodging → accommodation product; '
|
||||
'office / tech / hardware store → supplies product. '
|
||||
'Return "" if nothing fits.\n\n'
|
||||
f'Receipt text:\n{excerpt}\n\nJSON only:'
|
||||
)
|
||||
elif product_list:
|
||||
@@ -520,8 +567,10 @@ class ExpensesAgent(BaseAgent):
|
||||
lines = '\n'.join(f' • {a}' for a in self._actions_taken)
|
||||
n_skipped = data.get('n_skipped', 0)
|
||||
dup_note = f'\n({n_skipped} duplicate receipt(s) were automatically skipped.)' if n_skipped else ''
|
||||
stmt_skips = [e for e in self._escalations_list if 'statement' in e.lower()]
|
||||
stmt_note = ('\n⚠ ' + '\n⚠ '.join(stmt_skips)) if stmt_skips else ''
|
||||
summary = (
|
||||
f'Expense report created successfully:\n{lines}{dup_note}\n\n'
|
||||
f'Expense report created successfully:\n{lines}{dup_note}{stmt_note}\n\n'
|
||||
'The report is in draft — open Odoo › Expenses, '
|
||||
'review the amounts, and click Submit to send for approval.'
|
||||
)
|
||||
|
||||
@@ -427,7 +427,7 @@ async def test_act_no_employee_returns_empty_and_escalates():
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
from agent_service.agents.expenses_agent import (
|
||||
_extract_amount_from_text, _extract_date_from_text,
|
||||
_extract_amount_from_text, _extract_date_from_text, _is_likely_bank_statement,
|
||||
)
|
||||
|
||||
|
||||
@@ -502,6 +502,48 @@ class TestExtractAmount:
|
||||
assert _extract_amount_from_text(text) == 8.49
|
||||
|
||||
|
||||
class TestBankStatementDetection:
|
||||
def _stmt(self, n: int) -> str:
|
||||
"""Generate fake bank statement with n transaction lines."""
|
||||
lines = [f'05/{i+1:02d} MERCHANT {i} $1{i}.99' for i in range(n)]
|
||||
return '\n'.join(lines)
|
||||
|
||||
def test_receipt_not_flagged(self):
|
||||
# A typical restaurant receipt has < 10 amount-bearing lines
|
||||
text = 'Acme Cafe\nBurger 12.99\nFries 4.50\nDrink 2.99\nTax 1.65\nTotal 22.13'
|
||||
assert _is_likely_bank_statement(text) is False
|
||||
|
||||
def test_statement_flagged(self):
|
||||
# 10 transaction lines → flagged as statement
|
||||
assert _is_likely_bank_statement(self._stmt(10)) is True
|
||||
|
||||
def test_threshold_boundary(self):
|
||||
assert _is_likely_bank_statement(self._stmt(9)) is False
|
||||
assert _is_likely_bank_statement(self._stmt(10)) is True
|
||||
|
||||
def test_empty_text(self):
|
||||
assert _is_likely_bank_statement('') is False
|
||||
|
||||
def test_no_amounts(self):
|
||||
assert _is_likely_bank_statement('Hello world\nNo prices here') is False
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_parse_bank_statement_returns_skip():
|
||||
"""Bank statement image must be skipped — no amount, skip=True returned."""
|
||||
agent = _make_agent()
|
||||
# Build fake OCR text with 12 transaction lines
|
||||
stmt_text = '\n'.join(
|
||||
f'05/{i+1:02d} SOME MERCHANT {i} ${10 + i}.99' for i in range(12)
|
||||
)
|
||||
result = await agent._parse_receipt_text(
|
||||
stmt_text, '2026-05-15_bank.png',
|
||||
expense_products=[{'id': 1, 'name': 'Meals'}],
|
||||
)
|
||||
assert result.get('skip') is True
|
||||
assert result['amount'] == 0.0
|
||||
|
||||
|
||||
class TestExtractDate:
|
||||
def test_iso_format(self):
|
||||
assert _extract_date_from_text('Date: 2026-05-09') == '2026-05-09'
|
||||
|
||||
Reference in New Issue
Block a user