feat: pre-creation confirmation step with inline duplicate warnings
Before writing any expense records the bot now posts a numbered table of parsed vendor/amount/date for every receipt, with duplicate entries flagged inline. User replies 'confirm' (skips dups) or 'confirm, keep all'. This catches OCR amount misreads before they land in Odoo. Also removes the separate awaiting_dup_approval step; duplicate review is now part of the single confirmation table. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -273,6 +273,8 @@ class DiscussChannel(models.Model):
|
|||||||
'suspected duplicate',
|
'suspected duplicate',
|
||||||
'skip duplicates',
|
'skip duplicates',
|
||||||
'keep all',
|
'keep all',
|
||||||
|
'please review',
|
||||||
|
'reply "confirm"',
|
||||||
)
|
)
|
||||||
prev_was_bot_question = False
|
prev_was_bot_question = False
|
||||||
for msg in messages:
|
for msg in messages:
|
||||||
|
|||||||
@@ -67,18 +67,22 @@ class ExpensesAgent(BaseAgent):
|
|||||||
combined = task + ' ' + raw_msg
|
combined = task + ' ' + raw_msg
|
||||||
|
|
||||||
# Detect whether the user is responding to a duplicate-approval request
|
# Detect whether the user is responding to a duplicate-approval request
|
||||||
skip_keywords = ('skip', 'yes', 'remove duplicate', 'exclude duplicate', 'drop duplicate')
|
skip_keywords = ('skip', 'remove duplicate', 'exclude duplicate', 'drop duplicate')
|
||||||
keep_keywords = ('keep all', 'keep both', 'include all', 'no skip', "don't skip")
|
keep_keywords = ('keep all', 'keep both', 'include all', 'no skip', "don't skip")
|
||||||
if any(k in combined for k in skip_keywords):
|
confirm_keywords = ('confirm', 'looks good', 'go ahead', 'proceed', 'create it', 'create them')
|
||||||
user_dup_decision = 'skip'
|
if any(k in combined for k in keep_keywords):
|
||||||
elif any(k in combined for k in keep_keywords):
|
|
||||||
user_dup_decision = 'keep_all'
|
user_dup_decision = 'keep_all'
|
||||||
|
elif any(k in combined for k in skip_keywords):
|
||||||
|
user_dup_decision = 'skip'
|
||||||
else:
|
else:
|
||||||
user_dup_decision = 'none' # first time through — will ask if dups found
|
user_dup_decision = 'skip' # default: skip duplicates when confirmed
|
||||||
|
|
||||||
|
user_confirmed = any(k in combined for k in confirm_keywords)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'mode': 'create_from_receipts' if receipts else 'read',
|
'mode': 'create_from_receipts' if receipts else 'read',
|
||||||
'user_dup_decision': user_dup_decision,
|
'user_dup_decision': user_dup_decision,
|
||||||
|
'user_confirmed': user_confirmed,
|
||||||
'fetch_summary': any(k in task for k in ('summary', 'overview')) and not receipts,
|
'fetch_summary': any(k in task for k in ('summary', 'overview')) and not receipts,
|
||||||
'fetch_pending': any(k in task for k in ('pending', 'approve', 'approval')) and not receipts,
|
'fetch_pending': any(k in task for k in ('pending', 'approve', 'approval')) and not receipts,
|
||||||
'employee_id': self._directive.params.get('employee_id') if self._directive else None,
|
'employee_id': self._directive.params.get('employee_id') if self._directive else None,
|
||||||
@@ -88,7 +92,8 @@ class ExpensesAgent(BaseAgent):
|
|||||||
|
|
||||||
async def _gather(self, plan: dict) -> dict:
|
async def _gather(self, plan: dict) -> dict:
|
||||||
data: dict = {'mode': plan.get('mode', 'read'),
|
data: dict = {'mode': plan.get('mode', 'read'),
|
||||||
'user_dup_decision': plan.get('user_dup_decision', 'none')}
|
'user_dup_decision': plan.get('user_dup_decision', 'skip'),
|
||||||
|
'user_confirmed': plan.get('user_confirmed', False)}
|
||||||
if plan.get('mode') == 'create_from_receipts':
|
if plan.get('mode') == 'create_from_receipts':
|
||||||
self._gathered_data = data
|
self._gathered_data = data
|
||||||
return data
|
return data
|
||||||
@@ -121,7 +126,8 @@ class ExpensesAgent(BaseAgent):
|
|||||||
if not receipts:
|
if not receipts:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
user_dup_decision = self._gathered_data.get('user_dup_decision', 'none')
|
user_dup_decision = self._gathered_data.get('user_dup_decision', 'skip')
|
||||||
|
user_confirmed = self._gathered_data.get('user_confirmed', False)
|
||||||
|
|
||||||
user_id = (self._directive.context.peer_data.get('requesting_user_id')
|
user_id = (self._directive.context.peer_data.get('requesting_user_id')
|
||||||
if self._directive else None)
|
if self._directive else None)
|
||||||
@@ -131,12 +137,11 @@ class ExpensesAgent(BaseAgent):
|
|||||||
'No employee record found for the current user; cannot create expense report.')
|
'No employee record found for the current user; cannot create expense report.')
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# Fetch all expensable products once for category selection
|
|
||||||
expense_products = await self._et.get_expense_products()
|
expense_products = await self._et.get_expense_products()
|
||||||
default_product_id = expense_products[0]['id'] if expense_products else None
|
default_product_id = expense_products[0]['id'] if expense_products else None
|
||||||
product_map = {p['id']: p['name'] for p in expense_products}
|
product_map = {p['id']: p['name'] for p in expense_products}
|
||||||
|
|
||||||
# Pass 1: byte-exact dedup (same file uploaded twice)
|
# Pass 1: byte-exact dedup
|
||||||
seen_hashes: set = set()
|
seen_hashes: set = set()
|
||||||
unique_receipts = []
|
unique_receipts = []
|
||||||
for r in receipts:
|
for r in receipts:
|
||||||
@@ -148,7 +153,7 @@ class ExpensesAgent(BaseAgent):
|
|||||||
seen_hashes.add(h)
|
seen_hashes.add(h)
|
||||||
unique_receipts.append(r)
|
unique_receipts.append(r)
|
||||||
|
|
||||||
# Parse all receipts concurrently (bounded by Ollama semaphore)
|
# Parse all receipts concurrently
|
||||||
parse_tasks = [
|
parse_tasks = [
|
||||||
self._parse_receipt_text(
|
self._parse_receipt_text(
|
||||||
r.get('text', ''), r.get('filename', 'receipt'),
|
r.get('text', ''), r.get('filename', 'receipt'),
|
||||||
@@ -159,7 +164,6 @@ class ExpensesAgent(BaseAgent):
|
|||||||
]
|
]
|
||||||
raw_parsed = await asyncio.gather(*parse_tasks, return_exceptions=True)
|
raw_parsed = await asyncio.gather(*parse_tasks, return_exceptions=True)
|
||||||
|
|
||||||
# Normalise exceptions to fallback dicts
|
|
||||||
paired: list[tuple[dict, dict]] = []
|
paired: list[tuple[dict, dict]] = []
|
||||||
for receipt, parsed in zip(unique_receipts, raw_parsed):
|
for receipt, parsed in zip(unique_receipts, raw_parsed):
|
||||||
if isinstance(parsed, Exception):
|
if isinstance(parsed, Exception):
|
||||||
@@ -170,33 +174,32 @@ class ExpensesAgent(BaseAgent):
|
|||||||
'time': None, 'product_name': ''}
|
'time': None, 'product_name': ''}
|
||||||
paired.append((receipt, parsed))
|
paired.append((receipt, parsed))
|
||||||
|
|
||||||
# Pass 2: semantic dedup — detect multiple photos of the same receipt
|
# Pass 2: semantic dedup
|
||||||
deduped: list[tuple[dict, dict]] = []
|
deduped: list[tuple[dict, dict]] = []
|
||||||
dup_pairs: list[tuple[int, dict, dict]] = [] # (kept_idx, dup_receipt, dup_parsed)
|
dup_indices: set[int] = set() # indices into `paired` that are duplicates
|
||||||
for receipt, parsed in paired:
|
for i, (receipt, parsed) in enumerate(paired):
|
||||||
dup_idx = self._find_semantic_duplicate(parsed, deduped)
|
dup_idx = self._find_semantic_duplicate(parsed, deduped)
|
||||||
if dup_idx is not None:
|
if dup_idx is not None:
|
||||||
dup_pairs.append((dup_idx, receipt, parsed))
|
dup_indices.add(i)
|
||||||
# Tentatively keep whichever photo had more OCR text
|
|
||||||
if len(receipt.get('text', '')) > len(deduped[dup_idx][0].get('text', '')):
|
if len(receipt.get('text', '')) > len(deduped[dup_idx][0].get('text', '')):
|
||||||
deduped[dup_idx] = (receipt, parsed)
|
deduped[dup_idx] = (receipt, parsed)
|
||||||
else:
|
else:
|
||||||
deduped.append((receipt, parsed))
|
deduped.append((receipt, parsed))
|
||||||
|
|
||||||
# If duplicates were found and user hasn't decided yet, pause and ask
|
# Always show confirmation summary before creating — lets user verify
|
||||||
if dup_pairs and user_dup_decision == 'none':
|
# parsed amounts and review flagged duplicates in one step.
|
||||||
self._gathered_data['mode'] = 'awaiting_dup_approval'
|
if not user_confirmed:
|
||||||
self._pending_dup_pairs = dup_pairs
|
self._gathered_data['mode'] = 'awaiting_confirmation'
|
||||||
|
self._confirmation_items = [
|
||||||
|
(receipt, parsed, i in dup_indices)
|
||||||
|
for i, (receipt, parsed) in enumerate(paired)
|
||||||
|
]
|
||||||
self._deduped = deduped
|
self._deduped = deduped
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# Apply user's decision
|
# User confirmed — apply dup decision
|
||||||
if user_dup_decision == 'keep_all':
|
final_list = paired if user_dup_decision == 'keep_all' else deduped
|
||||||
final_list = paired
|
|
||||||
else:
|
|
||||||
final_list = deduped # default: skip semantic duplicates
|
|
||||||
|
|
||||||
# Create the sheet now that we know what to include
|
|
||||||
sheet_name = f'Expense Report - {_date.today().isoformat()}'
|
sheet_name = f'Expense Report - {_date.today().isoformat()}'
|
||||||
sheet_result = await self._et.create_expense_sheet(sheet_name, employee_id)
|
sheet_result = await self._et.create_expense_sheet(sheet_name, employee_id)
|
||||||
if not sheet_result.success:
|
if not sheet_result.success:
|
||||||
@@ -207,8 +210,6 @@ class ExpensesAgent(BaseAgent):
|
|||||||
actions = [f'Created expense sheet "{sheet_name}" (ID {sheet_id})']
|
actions = [f'Created expense sheet "{sheet_name}" (ID {sheet_id})']
|
||||||
|
|
||||||
for receipt, parsed in final_list:
|
for receipt, parsed in final_list:
|
||||||
|
|
||||||
# Pick product by name match returned from LLM, fall back to default
|
|
||||||
product_id = default_product_id
|
product_id = default_product_id
|
||||||
chosen_name = parsed.get('product_name', '')
|
chosen_name = parsed.get('product_name', '')
|
||||||
if chosen_name:
|
if chosen_name:
|
||||||
@@ -383,28 +384,32 @@ class ExpensesAgent(BaseAgent):
|
|||||||
data = self._gathered_data
|
data = self._gathered_data
|
||||||
directive_id = self._directive.directive_id if self._directive else ''
|
directive_id = self._directive.directive_id if self._directive else ''
|
||||||
|
|
||||||
if data.get('mode') == 'awaiting_dup_approval':
|
if data.get('mode') == 'awaiting_confirmation':
|
||||||
dup_pairs = getattr(self, '_pending_dup_pairs', [])
|
items = getattr(self, '_confirmation_items', [])
|
||||||
deduped = getattr(self, '_deduped', [])
|
n_dups = sum(1 for _, _, is_dup in items if is_dup)
|
||||||
lines = [f'I found {len(dup_pairs)} suspected duplicate receipt photo(s). '
|
lines = [f'I parsed {len(items)} receipt(s). Please review before I create the expense report:\n']
|
||||||
f'Please review before I create the expense report:\n']
|
lines.append(f' {"#":>3} {"Vendor":<30} {"Amount":>8} {"Date":<12}')
|
||||||
for kept_idx, dup_receipt, dup_parsed in dup_pairs:
|
lines.append(f' {"---":>3} {"-"*30} {"-"*8} {"-"*12}')
|
||||||
kept_receipt, kept_parsed = deduped[kept_idx]
|
for i, (receipt, parsed, is_dup) in enumerate(items, 1):
|
||||||
vendor = (dup_parsed.get('vendor') or kept_parsed.get('vendor', 'Unknown'))
|
vendor = str(parsed.get('vendor') or receipt.get('filename', '?'))[:30]
|
||||||
amount = float(dup_parsed.get('amount', 0))
|
amt = float(parsed.get('amount') or 0)
|
||||||
dt = dup_parsed.get('date', '')
|
dt = str(parsed.get('date') or '')
|
||||||
time_a = kept_parsed.get('time') or ''
|
flag = ' !! duplicate' if is_dup else ''
|
||||||
time_b = dup_parsed.get('time') or ''
|
lines.append(f' {i:>3}. {vendor:<30} ${amt:>7.2f} {dt}{flag}')
|
||||||
line = f'• {vendor} ${amount:.2f} on {dt}'
|
lines.append('')
|
||||||
if time_a or time_b:
|
if n_dups:
|
||||||
line += f' (Photo A at {time_a or "?"}, Photo B at {time_b or "?"})'
|
lines.append(
|
||||||
line += (f'\n Photo A: {kept_receipt.get("filename", "?")}'
|
f'{n_dups} item(s) marked "!! duplicate" appear to be the same receipt '
|
||||||
f'\n Photo B: {dup_receipt.get("filename", "?")}')
|
f'as another entry (possibly an OCR amount mismatch).'
|
||||||
lines.append(line)
|
)
|
||||||
lines.append(
|
lines.append(
|
||||||
'\nReply "skip duplicates" to keep the clearest photo of each, '
|
'Reply "confirm" to create the report and exclude duplicates (recommended).'
|
||||||
'or "keep all" to include every photo as a separate expense.'
|
)
|
||||||
)
|
lines.append(
|
||||||
|
'Reply "confirm, keep all" to include every item even if duplicated.'
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
lines.append('Reply "confirm" to create the expense report.')
|
||||||
return AgentReport(
|
return AgentReport(
|
||||||
directive_id=directive_id, agent=self.name, status='complete',
|
directive_id=directive_id, agent=self.name, status='complete',
|
||||||
summary='\n'.join(lines), data=data,
|
summary='\n'.join(lines), data=data,
|
||||||
|
|||||||
Reference in New Issue
Block a user