feat: pre-creation confirmation step with inline duplicate warnings

Before writing any expense records the bot now posts a numbered table
of parsed vendor/amount/date for every receipt, with duplicate entries
flagged inline. User replies 'confirm' (skips dups) or 'confirm, keep
all'. This catches OCR amount misreads before they land in Odoo.

Also removes the separate awaiting_dup_approval step; duplicate review
is now part of the single confirmation table.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Carlos Garcia
2026-05-16 16:54:25 -04:00
parent 12576ead1b
commit af1d27be89
2 changed files with 57 additions and 50 deletions

View File

@@ -273,6 +273,8 @@ class DiscussChannel(models.Model):
'suspected duplicate',
'skip duplicates',
'keep all',
'please review',
'reply "confirm"',
)
prev_was_bot_question = False
for msg in messages:

View File

@@ -67,18 +67,22 @@ class ExpensesAgent(BaseAgent):
combined = task + ' ' + raw_msg
# Detect whether the user is responding to a duplicate-approval request
skip_keywords = ('skip', 'yes', 'remove duplicate', 'exclude duplicate', 'drop duplicate')
skip_keywords = ('skip', 'remove duplicate', 'exclude duplicate', 'drop duplicate')
keep_keywords = ('keep all', 'keep both', 'include all', 'no skip', "don't skip")
if any(k in combined for k in skip_keywords):
user_dup_decision = 'skip'
elif any(k in combined for k in keep_keywords):
confirm_keywords = ('confirm', 'looks good', 'go ahead', 'proceed', 'create it', 'create them')
if any(k in combined for k in keep_keywords):
user_dup_decision = 'keep_all'
elif any(k in combined for k in skip_keywords):
user_dup_decision = 'skip'
else:
user_dup_decision = 'none' # first time through — will ask if dups found
user_dup_decision = 'skip' # default: skip duplicates when confirmed
user_confirmed = any(k in combined for k in confirm_keywords)
return {
'mode': 'create_from_receipts' if receipts else 'read',
'user_dup_decision': user_dup_decision,
'user_confirmed': user_confirmed,
'fetch_summary': any(k in task for k in ('summary', 'overview')) and not receipts,
'fetch_pending': any(k in task for k in ('pending', 'approve', 'approval')) and not receipts,
'employee_id': self._directive.params.get('employee_id') if self._directive else None,
@@ -88,7 +92,8 @@ class ExpensesAgent(BaseAgent):
async def _gather(self, plan: dict) -> dict:
data: dict = {'mode': plan.get('mode', 'read'),
'user_dup_decision': plan.get('user_dup_decision', 'none')}
'user_dup_decision': plan.get('user_dup_decision', 'skip'),
'user_confirmed': plan.get('user_confirmed', False)}
if plan.get('mode') == 'create_from_receipts':
self._gathered_data = data
return data
@@ -121,7 +126,8 @@ class ExpensesAgent(BaseAgent):
if not receipts:
return []
user_dup_decision = self._gathered_data.get('user_dup_decision', 'none')
user_dup_decision = self._gathered_data.get('user_dup_decision', 'skip')
user_confirmed = self._gathered_data.get('user_confirmed', False)
user_id = (self._directive.context.peer_data.get('requesting_user_id')
if self._directive else None)
@@ -131,12 +137,11 @@ class ExpensesAgent(BaseAgent):
'No employee record found for the current user; cannot create expense report.')
return []
# Fetch all expensable products once for category selection
expense_products = await self._et.get_expense_products()
default_product_id = expense_products[0]['id'] if expense_products else None
product_map = {p['id']: p['name'] for p in expense_products}
# Pass 1: byte-exact dedup (same file uploaded twice)
# Pass 1: byte-exact dedup
seen_hashes: set = set()
unique_receipts = []
for r in receipts:
@@ -148,7 +153,7 @@ class ExpensesAgent(BaseAgent):
seen_hashes.add(h)
unique_receipts.append(r)
# Parse all receipts concurrently (bounded by Ollama semaphore)
# Parse all receipts concurrently
parse_tasks = [
self._parse_receipt_text(
r.get('text', ''), r.get('filename', 'receipt'),
@@ -159,7 +164,6 @@ class ExpensesAgent(BaseAgent):
]
raw_parsed = await asyncio.gather(*parse_tasks, return_exceptions=True)
# Normalise exceptions to fallback dicts
paired: list[tuple[dict, dict]] = []
for receipt, parsed in zip(unique_receipts, raw_parsed):
if isinstance(parsed, Exception):
@@ -170,33 +174,32 @@ class ExpensesAgent(BaseAgent):
'time': None, 'product_name': ''}
paired.append((receipt, parsed))
# Pass 2: semantic dedup — detect multiple photos of the same receipt
# Pass 2: semantic dedup
deduped: list[tuple[dict, dict]] = []
dup_pairs: list[tuple[int, dict, dict]] = [] # (kept_idx, dup_receipt, dup_parsed)
for receipt, parsed in paired:
dup_indices: set[int] = set() # indices into `paired` that are duplicates
for i, (receipt, parsed) in enumerate(paired):
dup_idx = self._find_semantic_duplicate(parsed, deduped)
if dup_idx is not None:
dup_pairs.append((dup_idx, receipt, parsed))
# Tentatively keep whichever photo had more OCR text
dup_indices.add(i)
if len(receipt.get('text', '')) > len(deduped[dup_idx][0].get('text', '')):
deduped[dup_idx] = (receipt, parsed)
else:
deduped.append((receipt, parsed))
# If duplicates were found and user hasn't decided yet, pause and ask
if dup_pairs and user_dup_decision == 'none':
self._gathered_data['mode'] = 'awaiting_dup_approval'
self._pending_dup_pairs = dup_pairs
# Always show confirmation summary before creating — lets user verify
# parsed amounts and review flagged duplicates in one step.
if not user_confirmed:
self._gathered_data['mode'] = 'awaiting_confirmation'
self._confirmation_items = [
(receipt, parsed, i in dup_indices)
for i, (receipt, parsed) in enumerate(paired)
]
self._deduped = deduped
return []
# Apply user's decision
if user_dup_decision == 'keep_all':
final_list = paired
else:
final_list = deduped # default: skip semantic duplicates
# User confirmed — apply dup decision
final_list = paired if user_dup_decision == 'keep_all' else deduped
# Create the sheet now that we know what to include
sheet_name = f'Expense Report - {_date.today().isoformat()}'
sheet_result = await self._et.create_expense_sheet(sheet_name, employee_id)
if not sheet_result.success:
@@ -207,8 +210,6 @@ class ExpensesAgent(BaseAgent):
actions = [f'Created expense sheet "{sheet_name}" (ID {sheet_id})']
for receipt, parsed in final_list:
# Pick product by name match returned from LLM, fall back to default
product_id = default_product_id
chosen_name = parsed.get('product_name', '')
if chosen_name:
@@ -383,28 +384,32 @@ class ExpensesAgent(BaseAgent):
data = self._gathered_data
directive_id = self._directive.directive_id if self._directive else ''
if data.get('mode') == 'awaiting_dup_approval':
dup_pairs = getattr(self, '_pending_dup_pairs', [])
deduped = getattr(self, '_deduped', [])
lines = [f'I found {len(dup_pairs)} suspected duplicate receipt photo(s). '
f'Please review before I create the expense report:\n']
for kept_idx, dup_receipt, dup_parsed in dup_pairs:
kept_receipt, kept_parsed = deduped[kept_idx]
vendor = (dup_parsed.get('vendor') or kept_parsed.get('vendor', 'Unknown'))
amount = float(dup_parsed.get('amount', 0))
dt = dup_parsed.get('date', '')
time_a = kept_parsed.get('time') or ''
time_b = dup_parsed.get('time') or ''
line = f'{vendor} ${amount:.2f} on {dt}'
if time_a or time_b:
line += f' (Photo A at {time_a or "?"}, Photo B at {time_b or "?"})'
line += (f'\n Photo A: {kept_receipt.get("filename", "?")}'
f'\n Photo B: {dup_receipt.get("filename", "?")}')
lines.append(line)
lines.append(
'\nReply "skip duplicates" to keep the clearest photo of each, '
'or "keep all" to include every photo as a separate expense.'
)
if data.get('mode') == 'awaiting_confirmation':
items = getattr(self, '_confirmation_items', [])
n_dups = sum(1 for _, _, is_dup in items if is_dup)
lines = [f'I parsed {len(items)} receipt(s). Please review before I create the expense report:\n']
lines.append(f' {"#":>3} {"Vendor":<30} {"Amount":>8} {"Date":<12}')
lines.append(f' {"---":>3} {"-"*30} {"-"*8} {"-"*12}')
for i, (receipt, parsed, is_dup) in enumerate(items, 1):
vendor = str(parsed.get('vendor') or receipt.get('filename', '?'))[:30]
amt = float(parsed.get('amount') or 0)
dt = str(parsed.get('date') or '')
flag = ' !! duplicate' if is_dup else ''
lines.append(f' {i:>3}. {vendor:<30} ${amt:>7.2f} {dt}{flag}')
lines.append('')
if n_dups:
lines.append(
f'{n_dups} item(s) marked "!! duplicate" appear to be the same receipt '
f'as another entry (possibly an OCR amount mismatch).'
)
lines.append(
'Reply "confirm" to create the report and exclude duplicates (recommended).'
)
lines.append(
'Reply "confirm, keep all" to include every item even if duplicated.'
)
else:
lines.append('Reply "confirm" to create the expense report.')
return AgentReport(
directive_id=directive_id, agent=self.name, status='complete',
summary='\n'.join(lines), data=data,