fix: improve OCR accuracy for rotated/sideways receipt photos

- Dockerfile: add tesseract-ocr-osd for orientation detection data - receipt_parser: resize large phone photos to 1800px, convert to grayscale, sharpen before OCR; use psm 1 (auto + OSD) so rotated receipts are correctly oriented before text extraction - expenses_agent: tighten amount extraction prompt to pick the FINAL total, not subtotal or tax line, reducing misreads like 42.90->409.00 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-16 01:51:29 -04:00
parent 8a9d772b8e
commit c2d1078d79
3 changed files with 33 additions and 6 deletions
--- a/agent_service/tools/receipt_parser.py
+++ b/agent_service/tools/receipt_parser.py
@@ -81,10 +81,33 @@ def _extract_zip(zip_filename: str, data: bytes) -> list[dict]:

 def _ocr_image(data: bytes, filename: str) -> str:
    try:
-        from PIL import Image
+        from PIL import Image, ImageFilter, ImageOps
        import pytesseract
        img = Image.open(io.BytesIO(data))
-        return pytesseract.image_to_string(img).strip()
+
+        # Resize very large images — tesseract is slower and less accurate at
+        # phone-camera resolution; 1800px wide is plenty for receipt text.
+        max_w = 1800
+        if img.width > max_w:
+            scale = max_w / img.width
+            img = img.resize((max_w, int(img.height * scale)), Image.LANCZOS)
+
+        # Convert to grayscale and sharpen — improves OCR on thermal receipts
+        img = ImageOps.grayscale(img)
+        img = img.filter(ImageFilter.SHARPEN)
+
+        # Let Tesseract detect orientation (OSD) and use LSTM engine.
+        # psm 1 = automatic + orientation detection so rotated/sideways receipts
+        # are handled correctly. Fall back to psm 6 if OSD fails.
+        config_osd = '--oem 3 --psm 1'
+        config_block = '--oem 3 --psm 6'
+        try:
+            text = pytesseract.image_to_string(img, config=config_osd).strip()
+        except Exception:
+            text = pytesseract.image_to_string(img, config=config_block).strip()
+
+        logger.debug('OCR %s: %d chars extracted', filename, len(text))
+        return text
    except ImportError:
        logger.warning('pytesseract/Pillow not installed — OCR unavailable for %s', filename)
        return f'[Image: {filename} — install pytesseract+Pillow for OCR]'