feat: pre-creation confirmation step with inline duplicate warnings

Before writing any expense records the bot now posts a numbered table
of parsed vendor/amount/date for every receipt, with duplicate entries
flagged inline. User replies 'confirm' (skips dups) or 'confirm, keep
all'. This catches OCR amount misreads before they land in Odoo.

Also removes the separate awaiting_dup_approval step; duplicate review
is now part of the single confirmation table.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Carlos Garcia
2026-05-16 16:54:25 -04:00
parent 12576ead1b
commit af1d27be89
2 changed files with 57 additions and 50 deletions

View File

@@ -273,6 +273,8 @@ class DiscussChannel(models.Model):
'suspected duplicate', 'suspected duplicate',
'skip duplicates', 'skip duplicates',
'keep all', 'keep all',
'please review',
'reply "confirm"',
) )
prev_was_bot_question = False prev_was_bot_question = False
for msg in messages: for msg in messages:

View File

@@ -67,18 +67,22 @@ class ExpensesAgent(BaseAgent):
combined = task + ' ' + raw_msg combined = task + ' ' + raw_msg
# Detect whether the user is responding to a duplicate-approval request # Detect whether the user is responding to a duplicate-approval request
skip_keywords = ('skip', 'yes', 'remove duplicate', 'exclude duplicate', 'drop duplicate') skip_keywords = ('skip', 'remove duplicate', 'exclude duplicate', 'drop duplicate')
keep_keywords = ('keep all', 'keep both', 'include all', 'no skip', "don't skip") keep_keywords = ('keep all', 'keep both', 'include all', 'no skip', "don't skip")
if any(k in combined for k in skip_keywords): confirm_keywords = ('confirm', 'looks good', 'go ahead', 'proceed', 'create it', 'create them')
user_dup_decision = 'skip' if any(k in combined for k in keep_keywords):
elif any(k in combined for k in keep_keywords):
user_dup_decision = 'keep_all' user_dup_decision = 'keep_all'
elif any(k in combined for k in skip_keywords):
user_dup_decision = 'skip'
else: else:
user_dup_decision = 'none' # first time through — will ask if dups found user_dup_decision = 'skip' # default: skip duplicates when confirmed
user_confirmed = any(k in combined for k in confirm_keywords)
return { return {
'mode': 'create_from_receipts' if receipts else 'read', 'mode': 'create_from_receipts' if receipts else 'read',
'user_dup_decision': user_dup_decision, 'user_dup_decision': user_dup_decision,
'user_confirmed': user_confirmed,
'fetch_summary': any(k in task for k in ('summary', 'overview')) and not receipts, 'fetch_summary': any(k in task for k in ('summary', 'overview')) and not receipts,
'fetch_pending': any(k in task for k in ('pending', 'approve', 'approval')) and not receipts, 'fetch_pending': any(k in task for k in ('pending', 'approve', 'approval')) and not receipts,
'employee_id': self._directive.params.get('employee_id') if self._directive else None, 'employee_id': self._directive.params.get('employee_id') if self._directive else None,
@@ -88,7 +92,8 @@ class ExpensesAgent(BaseAgent):
async def _gather(self, plan: dict) -> dict: async def _gather(self, plan: dict) -> dict:
data: dict = {'mode': plan.get('mode', 'read'), data: dict = {'mode': plan.get('mode', 'read'),
'user_dup_decision': plan.get('user_dup_decision', 'none')} 'user_dup_decision': plan.get('user_dup_decision', 'skip'),
'user_confirmed': plan.get('user_confirmed', False)}
if plan.get('mode') == 'create_from_receipts': if plan.get('mode') == 'create_from_receipts':
self._gathered_data = data self._gathered_data = data
return data return data
@@ -121,7 +126,8 @@ class ExpensesAgent(BaseAgent):
if not receipts: if not receipts:
return [] return []
user_dup_decision = self._gathered_data.get('user_dup_decision', 'none') user_dup_decision = self._gathered_data.get('user_dup_decision', 'skip')
user_confirmed = self._gathered_data.get('user_confirmed', False)
user_id = (self._directive.context.peer_data.get('requesting_user_id') user_id = (self._directive.context.peer_data.get('requesting_user_id')
if self._directive else None) if self._directive else None)
@@ -131,12 +137,11 @@ class ExpensesAgent(BaseAgent):
'No employee record found for the current user; cannot create expense report.') 'No employee record found for the current user; cannot create expense report.')
return [] return []
# Fetch all expensable products once for category selection
expense_products = await self._et.get_expense_products() expense_products = await self._et.get_expense_products()
default_product_id = expense_products[0]['id'] if expense_products else None default_product_id = expense_products[0]['id'] if expense_products else None
product_map = {p['id']: p['name'] for p in expense_products} product_map = {p['id']: p['name'] for p in expense_products}
# Pass 1: byte-exact dedup (same file uploaded twice) # Pass 1: byte-exact dedup
seen_hashes: set = set() seen_hashes: set = set()
unique_receipts = [] unique_receipts = []
for r in receipts: for r in receipts:
@@ -148,7 +153,7 @@ class ExpensesAgent(BaseAgent):
seen_hashes.add(h) seen_hashes.add(h)
unique_receipts.append(r) unique_receipts.append(r)
# Parse all receipts concurrently (bounded by Ollama semaphore) # Parse all receipts concurrently
parse_tasks = [ parse_tasks = [
self._parse_receipt_text( self._parse_receipt_text(
r.get('text', ''), r.get('filename', 'receipt'), r.get('text', ''), r.get('filename', 'receipt'),
@@ -159,7 +164,6 @@ class ExpensesAgent(BaseAgent):
] ]
raw_parsed = await asyncio.gather(*parse_tasks, return_exceptions=True) raw_parsed = await asyncio.gather(*parse_tasks, return_exceptions=True)
# Normalise exceptions to fallback dicts
paired: list[tuple[dict, dict]] = [] paired: list[tuple[dict, dict]] = []
for receipt, parsed in zip(unique_receipts, raw_parsed): for receipt, parsed in zip(unique_receipts, raw_parsed):
if isinstance(parsed, Exception): if isinstance(parsed, Exception):
@@ -170,33 +174,32 @@ class ExpensesAgent(BaseAgent):
'time': None, 'product_name': ''} 'time': None, 'product_name': ''}
paired.append((receipt, parsed)) paired.append((receipt, parsed))
# Pass 2: semantic dedup — detect multiple photos of the same receipt # Pass 2: semantic dedup
deduped: list[tuple[dict, dict]] = [] deduped: list[tuple[dict, dict]] = []
dup_pairs: list[tuple[int, dict, dict]] = [] # (kept_idx, dup_receipt, dup_parsed) dup_indices: set[int] = set() # indices into `paired` that are duplicates
for receipt, parsed in paired: for i, (receipt, parsed) in enumerate(paired):
dup_idx = self._find_semantic_duplicate(parsed, deduped) dup_idx = self._find_semantic_duplicate(parsed, deduped)
if dup_idx is not None: if dup_idx is not None:
dup_pairs.append((dup_idx, receipt, parsed)) dup_indices.add(i)
# Tentatively keep whichever photo had more OCR text
if len(receipt.get('text', '')) > len(deduped[dup_idx][0].get('text', '')): if len(receipt.get('text', '')) > len(deduped[dup_idx][0].get('text', '')):
deduped[dup_idx] = (receipt, parsed) deduped[dup_idx] = (receipt, parsed)
else: else:
deduped.append((receipt, parsed)) deduped.append((receipt, parsed))
# If duplicates were found and user hasn't decided yet, pause and ask # Always show confirmation summary before creating — lets user verify
if dup_pairs and user_dup_decision == 'none': # parsed amounts and review flagged duplicates in one step.
self._gathered_data['mode'] = 'awaiting_dup_approval' if not user_confirmed:
self._pending_dup_pairs = dup_pairs self._gathered_data['mode'] = 'awaiting_confirmation'
self._confirmation_items = [
(receipt, parsed, i in dup_indices)
for i, (receipt, parsed) in enumerate(paired)
]
self._deduped = deduped self._deduped = deduped
return [] return []
# Apply user's decision # User confirmed — apply dup decision
if user_dup_decision == 'keep_all': final_list = paired if user_dup_decision == 'keep_all' else deduped
final_list = paired
else:
final_list = deduped # default: skip semantic duplicates
# Create the sheet now that we know what to include
sheet_name = f'Expense Report - {_date.today().isoformat()}' sheet_name = f'Expense Report - {_date.today().isoformat()}'
sheet_result = await self._et.create_expense_sheet(sheet_name, employee_id) sheet_result = await self._et.create_expense_sheet(sheet_name, employee_id)
if not sheet_result.success: if not sheet_result.success:
@@ -207,8 +210,6 @@ class ExpensesAgent(BaseAgent):
actions = [f'Created expense sheet "{sheet_name}" (ID {sheet_id})'] actions = [f'Created expense sheet "{sheet_name}" (ID {sheet_id})']
for receipt, parsed in final_list: for receipt, parsed in final_list:
# Pick product by name match returned from LLM, fall back to default
product_id = default_product_id product_id = default_product_id
chosen_name = parsed.get('product_name', '') chosen_name = parsed.get('product_name', '')
if chosen_name: if chosen_name:
@@ -383,28 +384,32 @@ class ExpensesAgent(BaseAgent):
data = self._gathered_data data = self._gathered_data
directive_id = self._directive.directive_id if self._directive else '' directive_id = self._directive.directive_id if self._directive else ''
if data.get('mode') == 'awaiting_dup_approval': if data.get('mode') == 'awaiting_confirmation':
dup_pairs = getattr(self, '_pending_dup_pairs', []) items = getattr(self, '_confirmation_items', [])
deduped = getattr(self, '_deduped', []) n_dups = sum(1 for _, _, is_dup in items if is_dup)
lines = [f'I found {len(dup_pairs)} suspected duplicate receipt photo(s). ' lines = [f'I parsed {len(items)} receipt(s). Please review before I create the expense report:\n']
f'Please review before I create the expense report:\n'] lines.append(f' {"#":>3} {"Vendor":<30} {"Amount":>8} {"Date":<12}')
for kept_idx, dup_receipt, dup_parsed in dup_pairs: lines.append(f' {"---":>3} {"-"*30} {"-"*8} {"-"*12}')
kept_receipt, kept_parsed = deduped[kept_idx] for i, (receipt, parsed, is_dup) in enumerate(items, 1):
vendor = (dup_parsed.get('vendor') or kept_parsed.get('vendor', 'Unknown')) vendor = str(parsed.get('vendor') or receipt.get('filename', '?'))[:30]
amount = float(dup_parsed.get('amount', 0)) amt = float(parsed.get('amount') or 0)
dt = dup_parsed.get('date', '') dt = str(parsed.get('date') or '')
time_a = kept_parsed.get('time') or '' flag = ' !! duplicate' if is_dup else ''
time_b = dup_parsed.get('time') or '' lines.append(f' {i:>3}. {vendor:<30} ${amt:>7.2f} {dt}{flag}')
line = f'{vendor} ${amount:.2f} on {dt}' lines.append('')
if time_a or time_b: if n_dups:
line += f' (Photo A at {time_a or "?"}, Photo B at {time_b or "?"})' lines.append(
line += (f'\n Photo A: {kept_receipt.get("filename", "?")}' f'{n_dups} item(s) marked "!! duplicate" appear to be the same receipt '
f'\n Photo B: {dup_receipt.get("filename", "?")}') f'as another entry (possibly an OCR amount mismatch).'
lines.append(line) )
lines.append( lines.append(
'\nReply "skip duplicates" to keep the clearest photo of each, ' 'Reply "confirm" to create the report and exclude duplicates (recommended).'
'or "keep all" to include every photo as a separate expense.' )
) lines.append(
'Reply "confirm, keep all" to include every item even if duplicated.'
)
else:
lines.append('Reply "confirm" to create the expense report.')
return AgentReport( return AgentReport(
directive_id=directive_id, agent=self.name, status='complete', directive_id=directive_id, agent=self.name, status='complete',
summary='\n'.join(lines), data=data, summary='\n'.join(lines), data=data,