From 4b7223a1391373d41173bea53afd23965bd324f4 Mon Sep 17 00:00:00 2001 From: Carlos Garcia Date: Sat, 16 May 2026 01:02:24 -0400 Subject: [PATCH] feat: file upload + expense report creation from Discuss attachments - Discuss bot now reads ir.attachment from incoming messages; file-only messages no longer silently dropped - ZIP files are described (contents listed) and bot asks clarifying question before acting; user's follow-up reply looks back for pending attachments so files don't need to be re-uploaded - receipt_parser: extracts text from ZIP (recursive), JPG/PNG/etc (OCR), PDF (pdfplumber), HTML, TXT - expenses_agent: full rewrite fixing broken method signatures; adds create_expense_sheet / create_expense / attach_receipt flow driven by LLM receipt parsing (Ollama, HIPAA-locked) - master_agent: extra_context threads receipts + user_id into directives - FastAPI /upload multipart endpoint; registered in main.py - Odoo /ai/upload controller proxies files to agent service - ab_ai_bot: dispatch_message_with_files() for multipart uploads Co-Authored-By: Claude Sonnet 4.6 --- addons/activeblue_ai/controllers/approval.py | 40 +++- addons/activeblue_ai/models/ab_ai_bot.py | 38 ++++ addons/activeblue_ai/models/ab_ai_mail.py | 135 ++++++++++++- agent_service/agents/base_agent.py | 1 + agent_service/agents/expenses_agent.py | 187 ++++++++++++++++--- agent_service/agents/master_agent.py | 20 +- agent_service/main.py | 3 +- agent_service/routers/upload.py | 80 ++++++++ agent_service/tools/expenses_tools.py | 61 ++++++ agent_service/tools/receipt_parser.py | 133 +++++++++++++ requirements.txt | 5 + 11 files changed, 658 insertions(+), 45 deletions(-) create mode 100644 agent_service/routers/upload.py create mode 100644 agent_service/tools/receipt_parser.py diff --git a/addons/activeblue_ai/controllers/approval.py b/addons/activeblue_ai/controllers/approval.py index 940f4fb..d1077e1 100644 --- a/addons/activeblue_ai/controllers/approval.py +++ b/addons/activeblue_ai/controllers/approval.py @@ -3,7 +3,7 @@ import logging import requests from odoo import http -from odoo.http import request +from odoo.http import request, Response _logger = logging.getLogger(__name__) @@ -58,3 +58,41 @@ class AiApprovalController(http.Controller): session_id=session_id, ) return result + + @http.route('/ai/upload', type='http', auth='user', methods=['POST'], csrf=False) + def upload(self, **kwargs): + bot = request.env['ab.ai.bot'].sudo().get_active_bot() + if not bot: + return Response( + json.dumps({'error': 'No bot configured'}), + content_type='application/json', status=503) + + url = bot._get_service_url() + '/upload' + message = request.httprequest.form.get( + 'message', 'Create an employee expense report from these receipts.') + session_id = request.httprequest.form.get('session_id', '') + + files_data = [ + ('files', (f.filename, f.read(), f.content_type or 'application/octet-stream')) + for f in request.httprequest.files.getlist('files') + ] + + try: + resp = requests.post( + url, + data={ + 'user_id': str(request.env.user.id), + 'message': message, + 'session_id': session_id, + }, + files=files_data or [('files', ('empty', b'', 'application/octet-stream'))], + headers=bot._build_headers(), + timeout=120, + ) + resp.raise_for_status() + return Response(resp.text, content_type='application/json') + except Exception as exc: + _logger.error('upload proxy failed: %s', exc) + return Response( + json.dumps({'error': str(exc), 'reply': 'Upload failed. Please try again.'}), + content_type='application/json', status=500) diff --git a/addons/activeblue_ai/models/ab_ai_bot.py b/addons/activeblue_ai/models/ab_ai_bot.py index 6302ed0..ebef265 100644 --- a/addons/activeblue_ai/models/ab_ai_bot.py +++ b/addons/activeblue_ai/models/ab_ai_bot.py @@ -111,6 +111,44 @@ class AbAiBot(models.Model): _logger.error('dispatch_message failed: %s', exc) raise UserError(_('Could not reach AI service: %s') % exc) + def dispatch_message_with_files(self, user_id, message, attachments, context=None, session_id=None): + """Send a message with file attachments to the /upload endpoint as multipart.""" + self.ensure_one() + import base64 + url = self._get_service_url() + '/upload' + + files = [] + for att in attachments: + try: + data = base64.b64decode(att.datas) if att.datas else b'' + files.append(('files', (att.name or 'attachment', + data, + att.mimetype or 'application/octet-stream'))) + except Exception as exc: + _logger.warning('Could not encode attachment %s: %s', att.id, exc) + + # Omit Content-Type so requests sets the multipart boundary automatically + headers = {} + if self.webhook_secret: + headers['X-ActiveBlue-Signature'] = self.webhook_secret + + form_data = { + 'user_id': str(user_id), + 'message': message or 'Create an employee expense report from these receipts.', + 'session_id': session_id or '', + } + + try: + resp = requests.post(url, data=form_data, files=files or [('files', ('empty', b'', 'text/plain'))], + headers=headers, timeout=120) + resp.raise_for_status() + return resp.json() + except requests.exceptions.Timeout: + raise UserError(_('AI service timed out. Please try again.')) + except requests.exceptions.RequestException as exc: + _logger.error('dispatch_message_with_files failed: %s', exc) + raise UserError(_('Could not reach AI service: %s') % exc) + @api.model def cron_ping_all(self): any_online = False diff --git a/addons/activeblue_ai/models/ab_ai_mail.py b/addons/activeblue_ai/models/ab_ai_mail.py index a14d438..7a54306 100644 --- a/addons/activeblue_ai/models/ab_ai_mail.py +++ b/addons/activeblue_ai/models/ab_ai_mail.py @@ -1,6 +1,9 @@ from __future__ import annotations +import base64 +import io import logging import re +import zipfile from odoo import models, api @@ -8,11 +11,52 @@ _logger = logging.getLogger(__name__) _HTML_TAG = re.compile(r'<[^>]+>') +# How many recent messages to scan when looking for a pending file upload +_LOOKBACK_MESSAGES = 10 + +# File type labels shown in the clarification message +_EXT_LABELS = { + 'jpg': 'image', 'jpeg': 'image', 'png': 'image', 'gif': 'image', + 'bmp': 'image', 'tiff': 'image', 'tif': 'image', 'webp': 'image', + 'pdf': 'PDF', 'html': 'HTML', 'htm': 'HTML', + 'txt': 'text', 'csv': 'spreadsheet', 'xlsx': 'spreadsheet', + 'zip': 'ZIP archive', +} + def _strip_html(html: str) -> str: return _HTML_TAG.sub(' ', html or '').strip() +def _ext(filename: str) -> str: + return filename.rsplit('.', 1)[-1].lower() if '.' in filename else '' + + +def _describe_zip(datas_b64: str, zip_name: str) -> str: + """Return a short HTML summary of a ZIP's contents without extracting data.""" + try: + raw = base64.b64decode(datas_b64) + with zipfile.ZipFile(io.BytesIO(raw)) as zf: + members = [m for m in zf.namelist() if not m.endswith('/')] + if not members: + return f'{zip_name} (empty archive)' + # Count by type + counts: dict[str, int] = {} + for m in members: + label = _EXT_LABELS.get(_ext(m), 'file') + counts[label] = counts.get(label, 0) + 1 + type_summary = ', '.join(f'{n} {t}(s)' for t, n in counts.items()) + lines = [f'{zip_name} — {len(members)} item(s): {type_summary}'] + for m in members[:8]: + lines.append(f'  • {m}') + if len(members) > 8: + lines.append(f'  … and {len(members) - 8} more') + return '
'.join(lines) + except Exception as exc: + _logger.warning('_describe_zip failed for %s: %s', zip_name, exc) + return f'{zip_name} (could not inspect contents)' + + class DiscussChannel(models.Model): _inherit = 'discuss.channel' @@ -40,10 +84,28 @@ class DiscussChannel(models.Model): return result text = _strip_html(body) - if not text: + attachments = result.attachment_ids + + # Nothing to work with + if not text and not attachments: return result - # Identify the human sender + # ── Case 1: file(s) with no instruction ────────────────────────────── + # Show the user what we received and ask what to do with it. + if attachments and not text: + self._post_file_clarification(attachments, bot_partner) + return result + + # ── Case 2: text only — look back for a pending file upload ────────── + # If the user just replied to our clarification question, find the + # attachment(s) they uploaded earlier in this conversation. + pending = self.env['ir.attachment'].browse() + if text and not attachments: + pending = self._find_pending_attachments(bot_partner) + + effective_attachments = attachments or pending + + # ── Case 3: text (+ possibly pending files) → dispatch ─────────────── human_partner = member_partners.filtered(lambda p: p != bot_partner)[:1] user = self.env['res.users'].search([('partner_id', '=', human_partner.id)], limit=1) uid = user.id if user else self.env.uid @@ -52,11 +114,21 @@ class DiscussChannel(models.Model): bot = self.env['ab.ai.bot'].sudo().search([('active', '=', True)], limit=1) if not bot: return result - response = bot.dispatch_message( - user_id=uid, - message=text, - context={'channel_id': self.id, 'source': 'discuss'}, - ) + + if effective_attachments: + response = bot.dispatch_message_with_files( + user_id=uid, + message=text, + attachments=effective_attachments, + context={'channel_id': self.id, 'source': 'discuss'}, + ) + else: + response = bot.dispatch_message( + user_id=uid, + message=text, + context={'channel_id': self.id, 'source': 'discuss'}, + ) + reply = (response or {}).get('reply') or (response or {}).get('message') or \ 'I could not process your request right now.' self.sudo().message_post( @@ -69,3 +141,52 @@ class DiscussChannel(models.Model): _logger.error('AI bot Discuss reply failed: %s', exc) return result + + def _post_file_clarification(self, attachments, bot_partner): + """Describe the uploaded file(s) and ask the user what to do with them.""" + lines = [] + for att in attachments: + name = att.name or 'file' + ext = _ext(name) + if ext == 'zip' and att.datas: + lines.append(_describe_zip(att.datas, name)) + else: + label = _EXT_LABELS.get(ext, 'file') + lines.append(f'{name} ({label})') + + file_summary = '
'.join(lines) + question = ( + f'I received the following file(s):
{file_summary}

' + 'What would you like me to do with them? Some options:
' + '• Create an expense report from these receipts
' + '• Import products from this data
' + '• Something else — just tell me what you need' + ) + self.sudo().message_post( + body=question, + author_id=bot_partner.id, + message_type='comment', + subtype_xmlid='mail.mt_comment', + ) + + def _find_pending_attachments(self, bot_partner): + """ + Scan the last _LOOKBACK_MESSAGES messages in this channel for the most + recent human-sent message that has attachments. Only returns them if + the immediately following bot message looks like a clarification question + (i.e. the bot hasn't already acted on those files). + """ + messages = self.message_ids.sorted('date', reverse=True)[:_LOOKBACK_MESSAGES] + prev_was_bot_question = False + for msg in messages: + is_bot = msg.author_id == bot_partner + if is_bot: + # Check whether this bot message was a clarification question + if 'what would you like me to do' in (msg.body or '').lower(): + prev_was_bot_question = True + continue + # Human message + if msg.attachment_ids and prev_was_bot_question: + return msg.attachment_ids + prev_was_bot_question = False + return self.env['ir.attachment'].browse() diff --git a/agent_service/agents/base_agent.py b/agent_service/agents/base_agent.py index 07b1c25..2ad2564 100644 --- a/agent_service/agents/base_agent.py +++ b/agent_service/agents/base_agent.py @@ -27,6 +27,7 @@ class DirectiveContext: recent_findings: list = field(default_factory=list) conversation_summary: str = '' peer_data: dict = field(default_factory=dict) + receipts: list = field(default_factory=list) # populated by upload flow @dataclass diff --git a/agent_service/agents/expenses_agent.py b/agent_service/agents/expenses_agent.py index c4fb995..17247d2 100644 --- a/agent_service/agents/expenses_agent.py +++ b/agent_service/agents/expenses_agent.py @@ -1,5 +1,7 @@ from __future__ import annotations +import json import logging +from datetime import date as _date from .base_agent import BaseAgent, AgentReport, AgentDirective, SweepReport from ..tools.expenses_tools import ExpensesTools @@ -46,23 +48,27 @@ class ExpensesAgent(BaseAgent): def __init__(self, odoo, llm, peer_bus=None): super().__init__(odoo, llm, peer_bus) self._et = ExpensesTools(odoo) - self._gathered_data = {} - self._actions_taken = [] - self._escalations_list = [] + self._gathered_data: dict = {} + self._actions_taken: list = [] + self._escalations_list: list = [] - async def _plan(self, directive: AgentDirective) -> dict: - intent = (directive.intent or '').lower() + async def _plan(self) -> dict: + task = (self._directive.task if self._directive else '').lower() + receipts = getattr(self._directive.context, 'receipts', []) if self._directive else [] return { - 'fetch_summary': any(k in intent for k in ('summary', 'overview', 'report')), - 'fetch_pending': any(k in intent for k in ('pending', 'approve', 'approval')), - 'employee_id': directive.context.get('employee_id'), - 'date_from': directive.context.get('date_from'), - 'date_to': directive.context.get('date_to'), + 'mode': 'create_from_receipts' if receipts else 'read', + 'fetch_summary': any(k in task for k in ('summary', 'overview')) and not receipts, + 'fetch_pending': any(k in task for k in ('pending', 'approve', 'approval')) and not receipts, + 'employee_id': self._directive.params.get('employee_id') if self._directive else None, + 'date_from': self._directive.params.get('date_from') if self._directive else None, + 'date_to': self._directive.params.get('date_to') if self._directive else None, } - async def _gather(self, ctx: dict) -> dict: - plan = ctx.get('plan', {}) - data: dict = {} + async def _gather(self, plan: dict) -> dict: + data: dict = {'mode': plan.get('mode', 'read')} + if plan.get('mode') == 'create_from_receipts': + self._gathered_data = data + return data data['summary'] = await self._et.get_expenses_summary( date_from=plan.get('date_from'), date_to=plan.get('date_to'), ) @@ -71,9 +77,12 @@ class ExpensesAgent(BaseAgent): self._gathered_data = data return data - async def _reason(self, ctx: dict) -> dict: + async def _reason(self) -> dict: data = self._gathered_data analysis: dict = {'escalations': [], 'flags': []} + if data.get('mode') == 'create_from_receipts': + self._escalations_list = [] + return analysis summary = data.get('summary', {}) if summary.get('pending_approval_count', 0) > 10: analysis['escalations'].append( @@ -82,23 +91,135 @@ class ExpensesAgent(BaseAgent): self._escalations_list = analysis['escalations'] return analysis - async def _act(self, ctx: dict) -> list: - return [] + async def _act(self, reasoning: dict) -> list: + if self._gathered_data.get('mode') != 'create_from_receipts': + return [] + receipts = getattr(self._directive.context, 'receipts', []) if self._directive else [] + if not receipts: + return [] - async def _report(self, ctx: dict) -> AgentReport: + user_id = (self._directive.context.peer_data.get('requesting_user_id') + if self._directive else None) + employee_id = await self._et.get_employee_id_for_user(user_id) + if not employee_id: + self._escalations_list.append( + 'No employee record found for the current user; cannot create expense report.') + return [] + + sheet_name = f'Expense Report - {_date.today().isoformat()}' + sheet_result = await self._et.create_expense_sheet(sheet_name, employee_id) + if not sheet_result.success: + self._escalations_list.append(f'Failed to create expense sheet: {sheet_result.error}') + return [] + + sheet_id = sheet_result.record_id + actions = [f'Created expense sheet "{sheet_name}" (ID {sheet_id})'] + + product_id = await self._et.get_default_expense_product() + + for receipt in receipts: + parsed = await self._parse_receipt_text( + receipt.get('text', ''), receipt.get('filename', 'receipt')) + expense_result = await self._et.create_expense( + sheet_id=sheet_id, + employee_id=employee_id, + name=str(parsed.get('vendor', receipt.get('filename', 'Expense')))[:64], + total_amount=float(parsed.get('amount', 0.0)), + date=str(parsed.get('date') or _date.today().isoformat()), + product_id=product_id, + description=str(parsed.get('description', '')), + ) + if expense_result.success: + actions.append( + f"Added: {parsed.get('vendor', 'Unknown vendor')} " + f"${float(parsed.get('amount', 0)):.2f} " + f"on {parsed.get('date', 'today')}" + ) + if receipt.get('b64'): + await self._et.attach_receipt( + 'hr.expense', expense_result.record_id, + receipt.get('filename', 'receipt'), + receipt['b64'], + receipt.get('mimetype', 'application/octet-stream'), + ) + else: + actions.append( + f"Could not create expense for {receipt.get('filename', 'receipt')}: " + f"{expense_result.error}" + ) + + self._actions_taken = actions + return actions + + async def _parse_receipt_text(self, text: str, filename: str) -> dict: + fallback = {'vendor': filename, 'amount': 0.0, + 'date': _date.today().isoformat(), 'description': filename} + if not text or text.startswith('['): + return fallback + + prompt = ( + 'Extract expense details from the following receipt text. ' + 'Return ONLY valid JSON with these keys: ' + '"vendor" (string), "amount" (number, the total charged), ' + '"date" (string YYYY-MM-DD, use today if absent), ' + '"description" (string, brief expense type).\n\n' + f'Receipt text (first 2000 chars):\n{text[:2000]}\n\nJSON only:' + ) + try: + resp = await self._llm.submit( + [{'role': 'user', 'content': prompt}], + caller='expenses_agent_receipt_parser', + ) + raw = (resp.content or '').strip() + first, last = raw.find('{'), raw.rfind('}') + if first != -1 and last > first: + data = json.loads(raw[first:last + 1]) + return { + 'vendor': str(data.get('vendor', filename)), + 'amount': float(data.get('amount', 0.0)), + 'date': str(data.get('date', _date.today().isoformat())), + 'description': str(data.get('description', '')), + } + except Exception as exc: + logger.warning('Receipt parse failed for %s: %s', filename, exc) + return fallback + + async def _report(self) -> AgentReport: data = self._gathered_data - summary = data.get('summary', {}) + directive_id = self._directive.directive_id if self._directive else '' + + if data.get('mode') == 'create_from_receipts': + if self._actions_taken: + lines = '\n'.join(f' • {a}' for a in self._actions_taken) + summary = ( + f'Expense report created successfully:\n{lines}\n\n' + 'The report is in draft. Please open Odoo > Expenses, ' + 'review the entries, and click Submit to send for approval.' + ) + status = 'complete' + else: + summary = ('Could not create expense report. ' + + '; '.join(self._escalations_list or ['Unknown error'])) + status = 'failed' + return AgentReport( + directive_id=directive_id, agent=self.name, status=status, + summary=summary, data=data, + escalations=self._escalations_list, actions_taken=self._actions_taken) + + summary_data = data.get('summary', {}) parts = [] - if summary: + if summary_data: parts.append( - f'Expenses: {summary.get("total_expenses", 0)} records, ' - f'total {summary.get("total_amount", 0):.2f}. ' - f'{summary.get("pending_approval_count", 0)} pending approval.' + f'Expenses: {summary_data.get("total_expenses", 0)} records, ' + f'total ${summary_data.get("total_amount", 0):.2f}. ' + f'{summary_data.get("pending_approval_count", 0)} pending approval.' ) if not parts: parts.append('Expenses review complete.') - return AgentReport(agent=self.name, summary=chr(10).join(parts), - data=data, escalations=self._escalations_list, actions_taken=[]) + return AgentReport( + directive_id=directive_id, agent=self.name, status='complete', + summary='\n'.join(parts), data=data, + escalations=self._escalations_list, actions_taken=[]) async def _dispatch_tool(self, name: str, args: dict): dispatch = { @@ -121,7 +242,8 @@ class ExpensesAgent(BaseAgent): if req_type == 'expenses_summary': return await self._et.get_expenses_summary() if req_type == 'employee_expenses': - return {'expenses': await self._et.get_expense_by_employee(employee_id=request['employee_id'])} + return {'expenses': await self._et.get_expense_by_employee( + employee_id=request['employee_id'])} return {'error': f'Unknown type: {req_type}'} except Exception as exc: return {'error': str(exc)} @@ -131,10 +253,15 @@ class ExpensesAgent(BaseAgent): try: pending = await self._et.get_pending_approvals() for sheet in pending: - findings.append({'type': 'pending_expense_approval', 'sheet_id': sheet.get('id'), - 'employee': sheet.get('employee_id', [0, ''])[1] if isinstance(sheet.get('employee_id'), list) else '', - 'amount': sheet.get('total_amount', 0), 'severity': 'low'}) + emp = sheet.get('employee_id', [0, '']) + findings.append({ + 'type': 'pending_expense_approval', + 'sheet_id': sheet.get('id'), + 'employee': emp[1] if isinstance(emp, list) else '', + 'amount': sheet.get('total_amount', 0), + 'severity': 'low', + }) except Exception as exc: - return SweepReport(agent=self.name, findings=[], actions=[], error=str(exc)) - return SweepReport(agent=self.name, findings=findings, actions=[], + return SweepReport(agent=self.name, findings=[], error=str(exc)) + return SweepReport(agent=self.name, findings=findings, actions_taken=[], summary=f'Expenses sweep: {len(findings)} pending approvals.') diff --git a/agent_service/agents/master_agent.py b/agent_service/agents/master_agent.py index eba946d..fe77939 100644 --- a/agent_service/agents/master_agent.py +++ b/agent_service/agents/master_agent.py @@ -71,7 +71,8 @@ class MasterAgent: # example block, so str.format would treat them as fields. return template.replace('{agent_list}', agent_list) - async def handle_message(self, user_id, channel_id, message, directive_id) -> MasterResponse: + async def handle_message(self, user_id, channel_id, message, directive_id, + extra_context: dict = None) -> MasterResponse: try: user_id = int(user_id) except (TypeError, ValueError): @@ -107,7 +108,8 @@ class MasterAgent: await self._memory.append_message(user_id, 'assistant', msg, directive_id) await self._log_directive_complete(directive_id, 'failed', msg) return MasterResponse(directive_id=directive_id, response=msg, status='failed') - directives = await self._build_directives(intent, context, directive_id) + directives = await self._build_directives(intent, context, directive_id, + user_id=user_id, extra_context=extra_context) reports = await self._dispatch_agents(directives) response_text = await self._synthesize(reports, context) await self._update_memory(user_id, message, response_text, reports, directive_id) @@ -197,20 +199,26 @@ class MasterAgent: return AccessResult(allowed=False, denied_agents=denied) return AccessResult(allowed=True) - async def _build_directives(self, intent: IntentResult, context: MasterContext, directive_id) -> list: + async def _build_directives(self, intent: IntentResult, context: MasterContext, directive_id, + user_id=None, extra_context: dict = None) -> list: + receipts = (extra_context or {}).get('receipts', []) directives = [] for agent_key in intent.agents: + authorized = ['read', 'search', 'report', 'post_chatter', + 'send_email', 'create_non_financial', 'write_non_financial'] + if receipts: + authorized.append('create_expense') ctx = DirectiveContext( client_profile=context.knowledge, recent_findings=context.operational_findings, conversation_summary=chr(10).join( m['content'] for m in context.conversation[-5:] if m['role'] == 'assistant'), - peer_data={}) + peer_data={'requesting_user_id': user_id}, + receipts=receipts) d = AgentDirective( directive_id=directive_id, agent=agent_key, task=intent.intent_summary, params=intent.params, context=ctx, - authorized_actions=['read', 'search', 'report', 'post_chatter', - 'send_email', 'create_non_financial', 'write_non_financial'], + authorized_actions=authorized, constraints={'max_amount': 5000}) directives.append(d) return directives diff --git a/agent_service/main.py b/agent_service/main.py index ab04fa2..3d415a2 100644 --- a/agent_service/main.py +++ b/agent_service/main.py @@ -10,7 +10,7 @@ from fastapi.middleware.cors import CORSMiddleware from .config import get_settings from . import app_state -from .routers import dispatch, approval, registry, sweep, health +from .routers import dispatch, approval, registry, sweep, health, upload logger = logging.getLogger(__name__) @@ -224,6 +224,7 @@ def create_app() -> FastAPI: allow_headers=['*'], ) app.include_router(dispatch.router) + app.include_router(upload.router) app.include_router(approval.router) app.include_router(registry.router) app.include_router(sweep.router) diff --git a/agent_service/routers/upload.py b/agent_service/routers/upload.py new file mode 100644 index 0000000..d5300b2 --- /dev/null +++ b/agent_service/routers/upload.py @@ -0,0 +1,80 @@ +from __future__ import annotations +import asyncio +import logging +import uuid +from typing import List, Optional + +from fastapi import APIRouter, File, Form, HTTPException, Request, UploadFile, status + +from ..config import get_settings +from .dispatch import DispatchResponse, _check_rate_limit, _verify_webhook_secret +from ..tools.receipt_parser import parse_upload + +logger = logging.getLogger(__name__) +router = APIRouter(prefix='/upload', tags=['upload']) + + +@router.post('', response_model=DispatchResponse) +async def upload( + request: Request, + user_id: str = Form(...), + message: str = Form(default='Create an employee expense report from these receipts.'), + session_id: Optional[str] = Form(default=None), + files: List[UploadFile] = File(default=[]), +): + _verify_webhook_secret(request) + _check_rate_limit(user_id) + + from ..app_state import get_master_agent + master = get_master_agent() + if master is None: + raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail='Agent service not ready') + + receipts: list[dict] = [] + for f in files: + data = await f.read() + filename = f.filename or 'receipt' + try: + parsed = parse_upload(filename, data) + receipts.extend(parsed) + logger.info('upload: parsed %s → %d receipt(s)', filename, len(parsed)) + except Exception as exc: + logger.warning('upload: parse failed for %s: %s', filename, exc) + + if not receipts: + logger.warning('upload: no parseable receipts found in upload from user_id=%s', user_id) + + directive_id = session_id or uuid.uuid4().hex + extra_context = {'receipts': receipts, 'user_id': user_id} + + settings = get_settings() + timeout = settings.directive_timeout_minutes * 60 + + try: + response = await asyncio.wait_for( + master.handle_message( + user_id=user_id, + channel_id=None, + message=message, + directive_id=directive_id, + extra_context=extra_context, + ), + timeout=timeout, + ) + except asyncio.TimeoutError: + raise HTTPException( + status_code=status.HTTP_504_GATEWAY_TIMEOUT, + detail=f'Directive timed out after {settings.directive_timeout_minutes}m', + ) + except Exception as exc: + logger.exception('upload error user=%s: %s', user_id, exc) + raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(exc)) + + return DispatchResponse( + directive_id=response.directive_id, + reply=response.response, + escalations=response.escalations, + actions_taken=response.actions_taken, + session_id=session_id, + ) diff --git a/agent_service/tools/expenses_tools.py b/agent_service/tools/expenses_tools.py index 3b27ff0..e66faa1 100644 --- a/agent_service/tools/expenses_tools.py +++ b/agent_service/tools/expenses_tools.py @@ -84,3 +84,64 @@ class ExpensesTools: async def post_chatter_note(self, model: str, record_id: int, note: str) -> bool: await self._o.call(model, 'message_post', [[record_id]], {'body': note, 'message_type': 'comment'}) return True + + async def get_employee_id_for_user(self, user_id) -> int | None: + if not user_id: + return None + try: + records = await self._o.search_read( + 'hr.employee', [('user_id', '=', int(user_id))], ['id', 'name'], limit=1) + return records[0]['id'] if records else None + except Exception as exc: + logger.warning('get_employee_id_for_user failed user_id=%s: %s', user_id, exc) + return None + + async def get_default_expense_product(self) -> int | None: + try: + records = await self._o.search_read( + 'product.product', + [('can_be_expensed', '=', True), ('type', '=', 'service')], + ['id', 'name'], limit=1) + return records[0]['id'] if records else None + except Exception as exc: + logger.warning('get_default_expense_product failed: %s', exc) + return None + + async def create_expense_sheet(self, name: str, employee_id: int): + return await self._o.create('hr.expense.sheet', { + 'name': name, + 'employee_id': employee_id, + }) + + async def create_expense(self, sheet_id: int, employee_id: int, name: str, + total_amount: float, date: str, product_id: int = None, + description: str = ''): + vals: dict = { + 'name': name, + 'employee_id': employee_id, + 'sheet_id': sheet_id, + 'total_amount': total_amount, + 'quantity': 1.0, + } + if date: + vals['date'] = date + if product_id: + vals['product_id'] = product_id + if description: + vals['description'] = description + return await self._o.create('hr.expense', vals) + + async def attach_receipt(self, model: str, record_id: int, filename: str, + file_b64: str, mimetype: str) -> bool: + try: + await self._o.create('ir.attachment', { + 'name': filename, + 'datas': file_b64, + 'res_model': model, + 'res_id': record_id, + 'mimetype': mimetype, + }) + return True + except Exception as exc: + logger.warning('attach_receipt failed %s/%s: %s', model, record_id, exc) + return False diff --git a/agent_service/tools/receipt_parser.py b/agent_service/tools/receipt_parser.py new file mode 100644 index 0000000..22d2761 --- /dev/null +++ b/agent_service/tools/receipt_parser.py @@ -0,0 +1,133 @@ +from __future__ import annotations +import base64 +import io +import logging +import zipfile +from pathlib import Path + +logger = logging.getLogger(__name__) + +_MIME = { + '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', + '.png': 'image/png', '.gif': 'image/gif', + '.bmp': 'image/bmp', '.tiff': 'image/tiff', '.tif': 'image/tiff', + '.webp': 'image/webp', '.pdf': 'application/pdf', + '.html': 'text/html', '.htm': 'text/html', + '.txt': 'text/plain', '.zip': 'application/zip', +} + +_IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff', '.tif', '.webp'} + + +def parse_upload(filename: str, data: bytes) -> list[dict]: + """ + Parse one uploaded file into a list of receipt dicts. + ZIP files are recursively unpacked; all other types return a single entry. + Each dict: {filename, text, b64, mimetype} + """ + ext = Path(filename).suffix.lower() + if ext == '.zip': + return _extract_zip(filename, data) + + b64 = base64.b64encode(data).decode() + mimetype = _MIME.get(ext, 'application/octet-stream') + + if ext in _IMAGE_EXTS: + text = _ocr_image(data, filename) + elif ext == '.pdf': + text = _extract_pdf(data, filename) + elif ext in ('.html', '.htm'): + text = _extract_html(data, filename) + elif ext == '.txt': + text = data.decode('utf-8', errors='replace') + else: + try: + text = data.decode('utf-8', errors='replace') + except Exception: + text = f'[Binary file: {filename}]' + + return [{'filename': filename, 'text': text, 'b64': b64, 'mimetype': mimetype}] + + +def _extract_zip(zip_filename: str, data: bytes) -> list[dict]: + results = [] + try: + with zipfile.ZipFile(io.BytesIO(data)) as zf: + for member in zf.namelist(): + if member.endswith('/'): + continue + try: + member_data = zf.read(member) + results.extend(parse_upload(Path(member).name, member_data)) + except Exception as exc: + logger.warning('receipt_parser: zip member %s failed: %s', member, exc) + except Exception as exc: + logger.error('receipt_parser: zip %s failed: %s', zip_filename, exc) + return results + + +def _ocr_image(data: bytes, filename: str) -> str: + try: + from PIL import Image + import pytesseract + img = Image.open(io.BytesIO(data)) + return pytesseract.image_to_string(img).strip() + except ImportError: + logger.warning('pytesseract/Pillow not installed — OCR unavailable for %s', filename) + return f'[Image: {filename} — install pytesseract+Pillow for OCR]' + except Exception as exc: + logger.warning('OCR failed for %s: %s', filename, exc) + return f'[Image: {filename} — OCR failed: {exc}]' + + +def _extract_pdf(data: bytes, filename: str) -> str: + try: + import pdfplumber + parts = [] + with pdfplumber.open(io.BytesIO(data)) as pdf: + for page in pdf.pages: + t = page.extract_text() + if t: + parts.append(t) + return '\n'.join(parts).strip() + except ImportError: + logger.warning('pdfplumber not installed — PDF extraction unavailable for %s', filename) + return f'[PDF: {filename} — install pdfplumber for text extraction]' + except Exception as exc: + logger.warning('PDF extraction failed for %s: %s', filename, exc) + return f'[PDF: {filename} — extraction failed: {exc}]' + + +def _extract_html(data: bytes, filename: str) -> str: + try: + from html.parser import HTMLParser + + class _TextExtractor(HTMLParser): + def __init__(self): + super().__init__() + self._parts: list[str] = [] + self._skip = False + + def handle_starttag(self, tag, attrs): + if tag in ('script', 'style'): + self._skip = True + + def handle_endtag(self, tag): + if tag in ('script', 'style'): + self._skip = False + + def handle_data(self, data): + if not self._skip: + s = data.strip() + if s: + self._parts.append(s) + + def text(self): + return ' '.join(self._parts) + + parser = _TextExtractor() + parser.feed(data.decode('utf-8', errors='replace')) + return parser.text() + except Exception as exc: + logger.warning('HTML extraction failed for %s: %s', filename, exc) + return f'[HTML: {filename} — extraction failed: {exc}]' diff --git a/requirements.txt b/requirements.txt index 30d26e9..f2342f8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,3 +11,8 @@ json-log-formatter==0.5.2 python-dotenv==1.0.1 mcp==1.3.0 ollama==0.3.3 +# Receipt parsing — also requires: apt install tesseract-ocr (for image OCR) +pdfplumber==0.11.4 +Pillow==10.4.0 +pytesseract==0.3.13 +python-multipart==0.0.12