When a zip/image arrives via /upload, the LLM was classifying the message as needs_clarification=True (because the chat body was just a filename like "download (8).zip", not an instruction), and the early return on line 91 fired before the receipts safety guard on line 106, so the guard never executed. master_agent: move the receipts safety guard to BEFORE the needs_clarification early-return. If extra_context contains receipts, unconditionally set needs_clarification=False and ensure expenses_agent is in the agents list — the LLM cannot veto an upload with a question. upload router: normalize empty or filename-only messages (e.g. when the user drops a file in Discuss chat with no text) to "Create an expense report from these uploaded receipts." so the LLM intent classification also has a sensible string to work with. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
100 lines
3.7 KiB
Python
100 lines
3.7 KiB
Python
from __future__ import annotations
|
|
import asyncio
|
|
import logging
|
|
import re
|
|
import uuid
|
|
from typing import List, Optional
|
|
|
|
# Matches messages that are just a filename (e.g. "download (8).zip", "receipt.jpg").
|
|
# When the chat body is only the filename, the LLM has nothing useful to classify.
|
|
_FILENAME_ONLY_RE = re.compile(r'^[\w\s\-.()\[\]]+\.\w{2,6}$')
|
|
|
|
from fastapi import APIRouter, File, Form, HTTPException, Request, UploadFile, status
|
|
|
|
from ..config import get_settings
|
|
from .dispatch import DispatchResponse, _check_rate_limit, _verify_webhook_secret
|
|
from ..tools.receipt_parser import parse_upload
|
|
|
|
logger = logging.getLogger(__name__)
|
|
router = APIRouter(prefix='/upload', tags=['upload'])
|
|
|
|
|
|
@router.post('', response_model=DispatchResponse)
|
|
async def upload(
|
|
request: Request,
|
|
user_id: str = Form(...),
|
|
message: str = Form(default=''),
|
|
session_id: Optional[str] = Form(default=None),
|
|
files: List[UploadFile] = File(default=[]),
|
|
):
|
|
_verify_webhook_secret(request)
|
|
_check_rate_limit(user_id)
|
|
|
|
from ..app_state import get_master_agent
|
|
master = get_master_agent()
|
|
if master is None:
|
|
raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
|
detail='Agent service not ready')
|
|
|
|
import asyncio
|
|
from concurrent.futures import ThreadPoolExecutor
|
|
_ocr_executor = ThreadPoolExecutor(max_workers=2)
|
|
|
|
# Normalise message: if the user sent no text (or the chat body is just the
|
|
# filename Odoo auto-inserts), give the master agent a clear instruction so
|
|
# it routes to expenses_agent rather than asking for clarification.
|
|
stripped = (message or '').strip()
|
|
if not stripped or _FILENAME_ONLY_RE.match(stripped):
|
|
message = 'Create an expense report from these uploaded receipts.'
|
|
logger.debug('upload: normalised empty/filename message for user_id=%s', user_id)
|
|
|
|
receipts: list[dict] = []
|
|
loop = asyncio.get_event_loop()
|
|
for f in files:
|
|
data = await f.read()
|
|
filename = f.filename or 'receipt'
|
|
try:
|
|
# parse_upload may run OCR (CPU-bound) — offload to thread pool
|
|
parsed = await loop.run_in_executor(_ocr_executor, parse_upload, filename, data)
|
|
receipts.extend(parsed)
|
|
logger.info('upload: parsed %s → %d receipt(s)', filename, len(parsed))
|
|
except Exception as exc:
|
|
logger.warning('upload: parse failed for %s: %s', filename, exc)
|
|
|
|
if not receipts:
|
|
logger.warning('upload: no parseable receipts found in upload from user_id=%s', user_id)
|
|
|
|
directive_id = session_id or uuid.uuid4().hex
|
|
extra_context = {'receipts': receipts, 'user_id': user_id}
|
|
|
|
settings = get_settings()
|
|
timeout = settings.directive_timeout_minutes * 60
|
|
|
|
try:
|
|
response = await asyncio.wait_for(
|
|
master.handle_message(
|
|
user_id=user_id,
|
|
channel_id=None,
|
|
message=message,
|
|
directive_id=directive_id,
|
|
extra_context=extra_context,
|
|
),
|
|
timeout=timeout,
|
|
)
|
|
except asyncio.TimeoutError:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_504_GATEWAY_TIMEOUT,
|
|
detail=f'Directive timed out after {settings.directive_timeout_minutes}m',
|
|
)
|
|
except Exception as exc:
|
|
logger.exception('upload error user=%s: %s', user_id, exc)
|
|
raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=str(exc))
|
|
|
|
return DispatchResponse(
|
|
directive_id=response.directive_id,
|
|
reply=response.response,
|
|
escalations=response.escalations,
|
|
actions_taken=response.actions_taken,
|
|
session_id=session_id,
|
|
)
|