feat: pre-creation confirmation step with inline duplicate warnings
Before writing any expense records the bot now posts a numbered table of parsed vendor/amount/date for every receipt, with duplicate entries flagged inline. User replies 'confirm' (skips dups) or 'confirm, keep all'. This catches OCR amount misreads before they land in Odoo. Also removes the separate awaiting_dup_approval step; duplicate review is now part of the single confirmation table. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -273,6 +273,8 @@ class DiscussChannel(models.Model):
|
||||
'suspected duplicate',
|
||||
'skip duplicates',
|
||||
'keep all',
|
||||
'please review',
|
||||
'reply "confirm"',
|
||||
)
|
||||
prev_was_bot_question = False
|
||||
for msg in messages:
|
||||
|
||||
@@ -67,18 +67,22 @@ class ExpensesAgent(BaseAgent):
|
||||
combined = task + ' ' + raw_msg
|
||||
|
||||
# Detect whether the user is responding to a duplicate-approval request
|
||||
skip_keywords = ('skip', 'yes', 'remove duplicate', 'exclude duplicate', 'drop duplicate')
|
||||
skip_keywords = ('skip', 'remove duplicate', 'exclude duplicate', 'drop duplicate')
|
||||
keep_keywords = ('keep all', 'keep both', 'include all', 'no skip', "don't skip")
|
||||
if any(k in combined for k in skip_keywords):
|
||||
user_dup_decision = 'skip'
|
||||
elif any(k in combined for k in keep_keywords):
|
||||
confirm_keywords = ('confirm', 'looks good', 'go ahead', 'proceed', 'create it', 'create them')
|
||||
if any(k in combined for k in keep_keywords):
|
||||
user_dup_decision = 'keep_all'
|
||||
elif any(k in combined for k in skip_keywords):
|
||||
user_dup_decision = 'skip'
|
||||
else:
|
||||
user_dup_decision = 'none' # first time through — will ask if dups found
|
||||
user_dup_decision = 'skip' # default: skip duplicates when confirmed
|
||||
|
||||
user_confirmed = any(k in combined for k in confirm_keywords)
|
||||
|
||||
return {
|
||||
'mode': 'create_from_receipts' if receipts else 'read',
|
||||
'user_dup_decision': user_dup_decision,
|
||||
'user_confirmed': user_confirmed,
|
||||
'fetch_summary': any(k in task for k in ('summary', 'overview')) and not receipts,
|
||||
'fetch_pending': any(k in task for k in ('pending', 'approve', 'approval')) and not receipts,
|
||||
'employee_id': self._directive.params.get('employee_id') if self._directive else None,
|
||||
@@ -88,7 +92,8 @@ class ExpensesAgent(BaseAgent):
|
||||
|
||||
async def _gather(self, plan: dict) -> dict:
|
||||
data: dict = {'mode': plan.get('mode', 'read'),
|
||||
'user_dup_decision': plan.get('user_dup_decision', 'none')}
|
||||
'user_dup_decision': plan.get('user_dup_decision', 'skip'),
|
||||
'user_confirmed': plan.get('user_confirmed', False)}
|
||||
if plan.get('mode') == 'create_from_receipts':
|
||||
self._gathered_data = data
|
||||
return data
|
||||
@@ -121,7 +126,8 @@ class ExpensesAgent(BaseAgent):
|
||||
if not receipts:
|
||||
return []
|
||||
|
||||
user_dup_decision = self._gathered_data.get('user_dup_decision', 'none')
|
||||
user_dup_decision = self._gathered_data.get('user_dup_decision', 'skip')
|
||||
user_confirmed = self._gathered_data.get('user_confirmed', False)
|
||||
|
||||
user_id = (self._directive.context.peer_data.get('requesting_user_id')
|
||||
if self._directive else None)
|
||||
@@ -131,12 +137,11 @@ class ExpensesAgent(BaseAgent):
|
||||
'No employee record found for the current user; cannot create expense report.')
|
||||
return []
|
||||
|
||||
# Fetch all expensable products once for category selection
|
||||
expense_products = await self._et.get_expense_products()
|
||||
default_product_id = expense_products[0]['id'] if expense_products else None
|
||||
product_map = {p['id']: p['name'] for p in expense_products}
|
||||
|
||||
# Pass 1: byte-exact dedup (same file uploaded twice)
|
||||
# Pass 1: byte-exact dedup
|
||||
seen_hashes: set = set()
|
||||
unique_receipts = []
|
||||
for r in receipts:
|
||||
@@ -148,7 +153,7 @@ class ExpensesAgent(BaseAgent):
|
||||
seen_hashes.add(h)
|
||||
unique_receipts.append(r)
|
||||
|
||||
# Parse all receipts concurrently (bounded by Ollama semaphore)
|
||||
# Parse all receipts concurrently
|
||||
parse_tasks = [
|
||||
self._parse_receipt_text(
|
||||
r.get('text', ''), r.get('filename', 'receipt'),
|
||||
@@ -159,7 +164,6 @@ class ExpensesAgent(BaseAgent):
|
||||
]
|
||||
raw_parsed = await asyncio.gather(*parse_tasks, return_exceptions=True)
|
||||
|
||||
# Normalise exceptions to fallback dicts
|
||||
paired: list[tuple[dict, dict]] = []
|
||||
for receipt, parsed in zip(unique_receipts, raw_parsed):
|
||||
if isinstance(parsed, Exception):
|
||||
@@ -170,33 +174,32 @@ class ExpensesAgent(BaseAgent):
|
||||
'time': None, 'product_name': ''}
|
||||
paired.append((receipt, parsed))
|
||||
|
||||
# Pass 2: semantic dedup — detect multiple photos of the same receipt
|
||||
# Pass 2: semantic dedup
|
||||
deduped: list[tuple[dict, dict]] = []
|
||||
dup_pairs: list[tuple[int, dict, dict]] = [] # (kept_idx, dup_receipt, dup_parsed)
|
||||
for receipt, parsed in paired:
|
||||
dup_indices: set[int] = set() # indices into `paired` that are duplicates
|
||||
for i, (receipt, parsed) in enumerate(paired):
|
||||
dup_idx = self._find_semantic_duplicate(parsed, deduped)
|
||||
if dup_idx is not None:
|
||||
dup_pairs.append((dup_idx, receipt, parsed))
|
||||
# Tentatively keep whichever photo had more OCR text
|
||||
dup_indices.add(i)
|
||||
if len(receipt.get('text', '')) > len(deduped[dup_idx][0].get('text', '')):
|
||||
deduped[dup_idx] = (receipt, parsed)
|
||||
else:
|
||||
deduped.append((receipt, parsed))
|
||||
|
||||
# If duplicates were found and user hasn't decided yet, pause and ask
|
||||
if dup_pairs and user_dup_decision == 'none':
|
||||
self._gathered_data['mode'] = 'awaiting_dup_approval'
|
||||
self._pending_dup_pairs = dup_pairs
|
||||
# Always show confirmation summary before creating — lets user verify
|
||||
# parsed amounts and review flagged duplicates in one step.
|
||||
if not user_confirmed:
|
||||
self._gathered_data['mode'] = 'awaiting_confirmation'
|
||||
self._confirmation_items = [
|
||||
(receipt, parsed, i in dup_indices)
|
||||
for i, (receipt, parsed) in enumerate(paired)
|
||||
]
|
||||
self._deduped = deduped
|
||||
return []
|
||||
|
||||
# Apply user's decision
|
||||
if user_dup_decision == 'keep_all':
|
||||
final_list = paired
|
||||
else:
|
||||
final_list = deduped # default: skip semantic duplicates
|
||||
# User confirmed — apply dup decision
|
||||
final_list = paired if user_dup_decision == 'keep_all' else deduped
|
||||
|
||||
# Create the sheet now that we know what to include
|
||||
sheet_name = f'Expense Report - {_date.today().isoformat()}'
|
||||
sheet_result = await self._et.create_expense_sheet(sheet_name, employee_id)
|
||||
if not sheet_result.success:
|
||||
@@ -207,8 +210,6 @@ class ExpensesAgent(BaseAgent):
|
||||
actions = [f'Created expense sheet "{sheet_name}" (ID {sheet_id})']
|
||||
|
||||
for receipt, parsed in final_list:
|
||||
|
||||
# Pick product by name match returned from LLM, fall back to default
|
||||
product_id = default_product_id
|
||||
chosen_name = parsed.get('product_name', '')
|
||||
if chosen_name:
|
||||
@@ -383,28 +384,32 @@ class ExpensesAgent(BaseAgent):
|
||||
data = self._gathered_data
|
||||
directive_id = self._directive.directive_id if self._directive else ''
|
||||
|
||||
if data.get('mode') == 'awaiting_dup_approval':
|
||||
dup_pairs = getattr(self, '_pending_dup_pairs', [])
|
||||
deduped = getattr(self, '_deduped', [])
|
||||
lines = [f'I found {len(dup_pairs)} suspected duplicate receipt photo(s). '
|
||||
f'Please review before I create the expense report:\n']
|
||||
for kept_idx, dup_receipt, dup_parsed in dup_pairs:
|
||||
kept_receipt, kept_parsed = deduped[kept_idx]
|
||||
vendor = (dup_parsed.get('vendor') or kept_parsed.get('vendor', 'Unknown'))
|
||||
amount = float(dup_parsed.get('amount', 0))
|
||||
dt = dup_parsed.get('date', '')
|
||||
time_a = kept_parsed.get('time') or ''
|
||||
time_b = dup_parsed.get('time') or ''
|
||||
line = f'• {vendor} ${amount:.2f} on {dt}'
|
||||
if time_a or time_b:
|
||||
line += f' (Photo A at {time_a or "?"}, Photo B at {time_b or "?"})'
|
||||
line += (f'\n Photo A: {kept_receipt.get("filename", "?")}'
|
||||
f'\n Photo B: {dup_receipt.get("filename", "?")}')
|
||||
lines.append(line)
|
||||
lines.append(
|
||||
'\nReply "skip duplicates" to keep the clearest photo of each, '
|
||||
'or "keep all" to include every photo as a separate expense.'
|
||||
)
|
||||
if data.get('mode') == 'awaiting_confirmation':
|
||||
items = getattr(self, '_confirmation_items', [])
|
||||
n_dups = sum(1 for _, _, is_dup in items if is_dup)
|
||||
lines = [f'I parsed {len(items)} receipt(s). Please review before I create the expense report:\n']
|
||||
lines.append(f' {"#":>3} {"Vendor":<30} {"Amount":>8} {"Date":<12}')
|
||||
lines.append(f' {"---":>3} {"-"*30} {"-"*8} {"-"*12}')
|
||||
for i, (receipt, parsed, is_dup) in enumerate(items, 1):
|
||||
vendor = str(parsed.get('vendor') or receipt.get('filename', '?'))[:30]
|
||||
amt = float(parsed.get('amount') or 0)
|
||||
dt = str(parsed.get('date') or '')
|
||||
flag = ' !! duplicate' if is_dup else ''
|
||||
lines.append(f' {i:>3}. {vendor:<30} ${amt:>7.2f} {dt}{flag}')
|
||||
lines.append('')
|
||||
if n_dups:
|
||||
lines.append(
|
||||
f'{n_dups} item(s) marked "!! duplicate" appear to be the same receipt '
|
||||
f'as another entry (possibly an OCR amount mismatch).'
|
||||
)
|
||||
lines.append(
|
||||
'Reply "confirm" to create the report and exclude duplicates (recommended).'
|
||||
)
|
||||
lines.append(
|
||||
'Reply "confirm, keep all" to include every item even if duplicated.'
|
||||
)
|
||||
else:
|
||||
lines.append('Reply "confirm" to create the expense report.')
|
||||
return AgentReport(
|
||||
directive_id=directive_id, agent=self.name, status='complete',
|
||||
summary='\n'.join(lines), data=data,
|
||||
|
||||
Reference in New Issue
Block a user