fix(expenses): improve receipt amount extraction and vendor naming
- Remove card brands (VISA/MC/Amex) from _SKIP_LINE_RE so card-terminal lines like "VISA USD$ 36.78" are no longer skipped - Replace bottom-50% scan with full-text max scan (Pass 2): scans every line in the receipt and returns the largest dollar amount, correctly handling display-style receipts that show the charge at the top with no label (e.g. LAYAL CAFE $40.10 before the item list) - Update vendor LLM prompt to ask the model to correct OCR garbling (e.g. "NeDonald's" → "McDonald's") and detect bank statements - Add 4 new tests covering top-amount, card-terminal, max-beats-items, and change-exclusion scenarios (71 tests, all passing) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -474,11 +474,33 @@ class TestExtractAmount:
|
||||
assert _extract_amount_from_text(text) == 5.50
|
||||
|
||||
def test_amount_due_with_usd_suffix(self):
|
||||
# PDF text may include "USD" after the number — regex should still work
|
||||
# via the bottom scan since the labeled-total regex won't match "USD"
|
||||
# "Total Charged" is in _TOTAL_RE — Pass 1 catches it
|
||||
text = 'Total Charged: $198.40 USD'
|
||||
assert _extract_amount_from_text(text) == 198.40
|
||||
|
||||
def test_top_amount_returned_by_max(self):
|
||||
# Display-style receipt: charge shown at top, no 'Total' label.
|
||||
# Pass 2 (max) must find $40.10 even though it is before the item list.
|
||||
text = 'LAYAL CAFE\n$40.10\n--------\nBreakfast 37.30\nCoffee 2.80'
|
||||
assert _extract_amount_from_text(text) == 40.10
|
||||
|
||||
def test_card_terminal_visa_line(self):
|
||||
# Card terminal: amount on a line prefixed with card-brand text.
|
||||
# VISA must NOT be in the skip list so the amount is captured.
|
||||
text = 'MERCHANT XYZ\nYHOOMHXAKKKEO4S VISA USD$ 36.78\nAuth 123456'
|
||||
assert _extract_amount_from_text(text) == 36.78
|
||||
|
||||
def test_max_beats_item_prices(self):
|
||||
# Receipt with several item prices — max should return the largest
|
||||
# (the total), not an item that appears last in the text.
|
||||
text = 'Burger 12.99\nFries 4.50\nDrink 2.99\nT0TAL 20.48'
|
||||
assert _extract_amount_from_text(text) == 20.48
|
||||
|
||||
def test_change_line_excluded_from_max(self):
|
||||
# Change-due line must be skipped so it never inflates the max.
|
||||
text = 'Items 8.49\nCash Tendered 20.00\nChange 11.51'
|
||||
assert _extract_amount_from_text(text) == 8.49
|
||||
|
||||
|
||||
class TestExtractDate:
|
||||
def test_iso_format(self):
|
||||
|
||||
Reference in New Issue
Block a user