fix(expenses): LAYAL CAFE $2.80 bug, United Airlines rotation & date
LAYAL CAFE ($2.80 instead of $42.90): - Add (?!\s*tax) lookahead to _TOTAL_RE so "Total Taxes $2.80" is never confused with the receipt total when OCR drops the "Taxes" word - Change Pass 1 from matches[-1] to max() so the largest labeled amount always wins, regardless of line order in the OCR output United Airlines (Subway/$0/wrong date): - Add OSD-based rotation correction in receipt_parser.py: after EXIF transpose, ask Tesseract's orientation-detection engine (--psm 0) what angle to rotate; applies to receipts photographed lying sideways where EXIF metadata cannot help - Add month-name date patterns (DD MON YYYY / MON DD YYYY) to _extract_date_from_text for airline/hotel receipts that print dates like "05 MAY 2026" instead of "05/07/26" 85 tests, all passing. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -428,6 +428,7 @@ async def test_act_no_employee_returns_empty_and_escalates():
|
||||
|
||||
from agent_service.agents.expenses_agent import (
|
||||
_extract_amount_from_text, _extract_date_from_text, _is_likely_bank_statement,
|
||||
_MONTH_MAP,
|
||||
)
|
||||
|
||||
|
||||
@@ -473,6 +474,19 @@ class TestExtractAmount:
|
||||
text = 'Items 5.00\nTax 0.50\nTotal\n5.50'
|
||||
assert _extract_amount_from_text(text) == 5.50
|
||||
|
||||
def test_total_taxes_excluded(self):
|
||||
# "Total Taxes $2.80" must NOT be confused with the receipt total;
|
||||
# the labeled-total regex excludes 'total tax/taxes' via lookahead.
|
||||
text = 'Subtotal $40.10\nTotal Taxes $2.80\nTotal $42.90'
|
||||
assert _extract_amount_from_text(text) == 42.90
|
||||
|
||||
def test_pass1_returns_max_not_last(self):
|
||||
# If OCR garbles "Total Taxes" into "Total\n$2.80", _TOTAL_RE would
|
||||
# accidentally match twice. max() must win over positional [-1].
|
||||
# Simulate by giving two labeled totals where smaller appears second.
|
||||
text = 'Grand Total $42.90\nTotal $2.80'
|
||||
assert _extract_amount_from_text(text) == 42.90
|
||||
|
||||
def test_total_sale_gas_station(self):
|
||||
# Costco / Shell gas receipts say "Total Sale $X.XX", not "Total: $X.XX"
|
||||
text = 'Pump 9 16.189 Gal\nRegular $ 58.75\nTotal Sale $ 58.75'
|
||||
@@ -566,6 +580,20 @@ class TestExtractDate:
|
||||
def test_us_short_year(self):
|
||||
assert _extract_date_from_text('05/09/26') == '2026-05-09'
|
||||
|
||||
def test_dd_mon_yyyy(self):
|
||||
# Airline / hotel receipts: "05 MAY 2026", "Issue Date: 05 May 2026"
|
||||
assert _extract_date_from_text('Issue Date: 05 MAY 2026 MIA A70') == '2026-05-05'
|
||||
|
||||
def test_mon_dd_yyyy(self):
|
||||
assert _extract_date_from_text('MAY 05 2026') == '2026-05-05'
|
||||
|
||||
def test_mon_dd_comma_yyyy(self):
|
||||
assert _extract_date_from_text('May 5, 2026') == '2026-05-05'
|
||||
|
||||
def test_month_map_completeness(self):
|
||||
# All twelve three-letter abbreviations must be present
|
||||
assert len({k for k in _MONTH_MAP if len(k) == 3}) == 12
|
||||
|
||||
def test_no_date(self):
|
||||
assert _extract_date_from_text('No date here') is None
|
||||
|
||||
|
||||
Reference in New Issue
Block a user