feat: file upload + expense report creation from Discuss attachments

- Discuss bot now reads ir.attachment from incoming messages; file-only
  messages no longer silently dropped
- ZIP files are described (contents listed) and bot asks clarifying
  question before acting; user's follow-up reply looks back for pending
  attachments so files don't need to be re-uploaded
- receipt_parser: extracts text from ZIP (recursive), JPG/PNG/etc (OCR),
  PDF (pdfplumber), HTML, TXT
- expenses_agent: full rewrite fixing broken method signatures; adds
  create_expense_sheet / create_expense / attach_receipt flow driven by
  LLM receipt parsing (Ollama, HIPAA-locked)
- master_agent: extra_context threads receipts + user_id into directives
- FastAPI /upload multipart endpoint; registered in main.py
- Odoo /ai/upload controller proxies files to agent service
- ab_ai_bot: dispatch_message_with_files() for multipart uploads

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Carlos Garcia
2026-05-16 01:02:24 -04:00
parent bee8e20580
commit 4b7223a139
11 changed files with 658 additions and 45 deletions

View File

@@ -84,3 +84,64 @@ class ExpensesTools:
async def post_chatter_note(self, model: str, record_id: int, note: str) -> bool:
await self._o.call(model, 'message_post', [[record_id]], {'body': note, 'message_type': 'comment'})
return True
async def get_employee_id_for_user(self, user_id) -> int | None:
if not user_id:
return None
try:
records = await self._o.search_read(
'hr.employee', [('user_id', '=', int(user_id))], ['id', 'name'], limit=1)
return records[0]['id'] if records else None
except Exception as exc:
logger.warning('get_employee_id_for_user failed user_id=%s: %s', user_id, exc)
return None
async def get_default_expense_product(self) -> int | None:
try:
records = await self._o.search_read(
'product.product',
[('can_be_expensed', '=', True), ('type', '=', 'service')],
['id', 'name'], limit=1)
return records[0]['id'] if records else None
except Exception as exc:
logger.warning('get_default_expense_product failed: %s', exc)
return None
async def create_expense_sheet(self, name: str, employee_id: int):
return await self._o.create('hr.expense.sheet', {
'name': name,
'employee_id': employee_id,
})
async def create_expense(self, sheet_id: int, employee_id: int, name: str,
total_amount: float, date: str, product_id: int = None,
description: str = ''):
vals: dict = {
'name': name,
'employee_id': employee_id,
'sheet_id': sheet_id,
'total_amount': total_amount,
'quantity': 1.0,
}
if date:
vals['date'] = date
if product_id:
vals['product_id'] = product_id
if description:
vals['description'] = description
return await self._o.create('hr.expense', vals)
async def attach_receipt(self, model: str, record_id: int, filename: str,
file_b64: str, mimetype: str) -> bool:
try:
await self._o.create('ir.attachment', {
'name': filename,
'datas': file_b64,
'res_model': model,
'res_id': record_id,
'mimetype': mimetype,
})
return True
except Exception as exc:
logger.warning('attach_receipt failed %s/%s: %s', model, record_id, exc)
return False

View File

@@ -0,0 +1,133 @@
from __future__ import annotations
import base64
import io
import logging
import zipfile
from pathlib import Path
logger = logging.getLogger(__name__)
_MIME = {
'.jpg': 'image/jpeg', '.jpeg': 'image/jpeg',
'.png': 'image/png', '.gif': 'image/gif',
'.bmp': 'image/bmp', '.tiff': 'image/tiff', '.tif': 'image/tiff',
'.webp': 'image/webp', '.pdf': 'application/pdf',
'.html': 'text/html', '.htm': 'text/html',
'.txt': 'text/plain', '.zip': 'application/zip',
}
_IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.tif', '.webp'}
def parse_upload(filename: str, data: bytes) -> list[dict]:
"""
Parse one uploaded file into a list of receipt dicts.
ZIP files are recursively unpacked; all other types return a single entry.
Each dict: {filename, text, b64, mimetype}
"""
ext = Path(filename).suffix.lower()
if ext == '.zip':
return _extract_zip(filename, data)
b64 = base64.b64encode(data).decode()
mimetype = _MIME.get(ext, 'application/octet-stream')
if ext in _IMAGE_EXTS:
text = _ocr_image(data, filename)
elif ext == '.pdf':
text = _extract_pdf(data, filename)
elif ext in ('.html', '.htm'):
text = _extract_html(data, filename)
elif ext == '.txt':
text = data.decode('utf-8', errors='replace')
else:
try:
text = data.decode('utf-8', errors='replace')
except Exception:
text = f'[Binary file: {filename}]'
return [{'filename': filename, 'text': text, 'b64': b64, 'mimetype': mimetype}]
def _extract_zip(zip_filename: str, data: bytes) -> list[dict]:
results = []
try:
with zipfile.ZipFile(io.BytesIO(data)) as zf:
for member in zf.namelist():
if member.endswith('/'):
continue
try:
member_data = zf.read(member)
results.extend(parse_upload(Path(member).name, member_data))
except Exception as exc:
logger.warning('receipt_parser: zip member %s failed: %s', member, exc)
except Exception as exc:
logger.error('receipt_parser: zip %s failed: %s', zip_filename, exc)
return results
def _ocr_image(data: bytes, filename: str) -> str:
try:
from PIL import Image
import pytesseract
img = Image.open(io.BytesIO(data))
return pytesseract.image_to_string(img).strip()
except ImportError:
logger.warning('pytesseract/Pillow not installed — OCR unavailable for %s', filename)
return f'[Image: {filename} — install pytesseract+Pillow for OCR]'
except Exception as exc:
logger.warning('OCR failed for %s: %s', filename, exc)
return f'[Image: {filename} — OCR failed: {exc}]'
def _extract_pdf(data: bytes, filename: str) -> str:
try:
import pdfplumber
parts = []
with pdfplumber.open(io.BytesIO(data)) as pdf:
for page in pdf.pages:
t = page.extract_text()
if t:
parts.append(t)
return '\n'.join(parts).strip()
except ImportError:
logger.warning('pdfplumber not installed — PDF extraction unavailable for %s', filename)
return f'[PDF: {filename} — install pdfplumber for text extraction]'
except Exception as exc:
logger.warning('PDF extraction failed for %s: %s', filename, exc)
return f'[PDF: {filename} — extraction failed: {exc}]'
def _extract_html(data: bytes, filename: str) -> str:
try:
from html.parser import HTMLParser
class _TextExtractor(HTMLParser):
def __init__(self):
super().__init__()
self._parts: list[str] = []
self._skip = False
def handle_starttag(self, tag, attrs):
if tag in ('script', 'style'):
self._skip = True
def handle_endtag(self, tag):
if tag in ('script', 'style'):
self._skip = False
def handle_data(self, data):
if not self._skip:
s = data.strip()
if s:
self._parts.append(s)
def text(self):
return ' '.join(self._parts)
parser = _TextExtractor()
parser.feed(data.decode('utf-8', errors='replace'))
return parser.text()
except Exception as exc:
logger.warning('HTML extraction failed for %s: %s', filename, exc)
return f'[HTML: {filename} — extraction failed: {exc}]'