feat: OCR via tesseract, dedup, category selection for expense receipts
- Dockerfile: install tesseract-ocr so Pillow+pytesseract can OCR receipt images - operational_store: JSON-serialize raw_data before passing to asyncpg JSONB - receipt_parser: add SHA256 hash + date extracted from filename timestamps - expenses_agent: deduplicate receipts by hash before creating expense records - expenses_agent: fetch all expensable Odoo products, pass list to LLM for category selection (Meals, Flights, etc.) per receipt - expenses_agent: pass date_hint from filename (e.g. 20260509_180857.jpg -> 2026-05-09) as fallback when OCR text is unavailable - expenses_tools: add get_expense_products() to fetch all expensable products Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
from __future__ import annotations
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
@@ -11,12 +12,14 @@ class OperationalStore:
|
||||
|
||||
async def store(self, scope, summary, raw_data=None, ttl_days=90, source_directive_id=None):
|
||||
expires_at = datetime.utcnow() + timedelta(days=ttl_days)
|
||||
# asyncpg JSONB column expects a JSON string, not a Python dict
|
||||
raw_data_json = json.dumps(raw_data) if raw_data is not None else None
|
||||
async with self._pool.acquire(timeout=10) as conn:
|
||||
await conn.execute(
|
||||
"""INSERT INTO ab_operational_memory
|
||||
(scope, summary, raw_data, source_directive_id, expires_at)
|
||||
VALUES ($1, $2, $3, $4, $5)""",
|
||||
scope, summary, raw_data, source_directive_id, expires_at)
|
||||
scope, summary, raw_data_json, source_directive_id, expires_at)
|
||||
|
||||
async def get_recent(self, scope, limit=10):
|
||||
async with self._pool.acquire(timeout=10) as conn:
|
||||
|
||||
Reference in New Issue
Block a user