fix(expenses): add 'Total Sale' and 'Net Sale' to labeled-total pattern

Gas station receipts (Costco, Shell, etc.) print "Total Sale $X.XX" — the
word "Sale" between "Total" and the amount prevented _TOTAL_RE from matching,
causing the Costco receipt to fall through to the max-scan heuristic and
return a garbled OCR value instead of the correct total.

Also add "Net Sale" and "Sale Total" variants for broader coverage.
79 tests, all passing.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Carlos Garcia
2026-05-21 00:36:44 -04:00
parent 77fab52475
commit ce57d19528
2 changed files with 13 additions and 2 deletions

View File

@@ -13,10 +13,12 @@ from ..tools.expenses_tools import ExpensesTools
# ---------------------------------------------------------------------------
# Matches an explicitly labeled total line.
# Handles "Total: $22.46", "GRAND TOTAL 22.46", "Amount Due: 22.46", etc.
# Handles "Total: $22.46", "GRAND TOTAL 22.46", "Amount Due: 22.46",
# "Total Sale $58.75" (gas stations), "Net Sale $X", etc.
_TOTAL_RE = re.compile(
r'(?:grand\s*total|total\s*due|amount\s*due|balance\s*due|'
r'total\s*amount|total\s*charged|you\s*paid|amount\s*paid|total)'
r'total\s*amount|total\s*charged|total\s*sale|net\s*sale|'
r'sale\s*total|you\s*paid|amount\s*paid|total)'
r'\s*[:\-]?\s*\$?\s*([\d,]+\.\d{2})',
re.IGNORECASE,
)

View File

@@ -473,6 +473,15 @@ class TestExtractAmount:
text = 'Items 5.00\nTax 0.50\nTotal\n5.50'
assert _extract_amount_from_text(text) == 5.50
def test_total_sale_gas_station(self):
# Costco / Shell gas receipts say "Total Sale $X.XX", not "Total: $X.XX"
text = 'Pump 9 16.189 Gal\nRegular $ 58.75\nTotal Sale $ 58.75'
assert _extract_amount_from_text(text) == 58.75
def test_net_sale(self):
text = 'Items 22.00\nNet Sale $22.00'
assert _extract_amount_from_text(text) == 22.00
def test_amount_due_with_usd_suffix(self):
# "Total Charged" is in _TOTAL_RE — Pass 1 catches it
text = 'Total Charged: $198.40 USD'