expenses_agent: batch LLM calls + skip RAG to fix timeout on large uploads
- auto_rag=False: skip PeerBus odoo_doc_agent call on every execute(); eliminates 30s Ollama semaphore contention before parsing even starts - _batch_parse_receipts(): Phase 1 regex (instant per-receipt: amount, date, bank-statement skip); Phase 2 single batched LLM call for all vendor+product_name instead of N individual calls; vision mode falls back to per-receipt calls (can't batch images); LLM fallback on bad JSON or wrong item count - _act() updated to use _batch_parse_receipts() - 7 new tests covering batch happy path, regex-only amounts, private-key cleanup, bank-statement skip, malformed-JSON fallback, wrong-count fallback, no-products short-circuit (99 tests total, all passing) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -856,6 +856,190 @@ async def test_non_image_mimetype_uses_text_path_in_vision_mode():
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _batch_parse_receipts — batched LLM call for vendor + product_name
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _make_receipt(filename='receipt.jpg', text='Acme\nTotal: $10.00',
|
||||
b64='', mimetype='image/jpeg', date_from_name=None):
|
||||
"""Build a minimal receipt dict as produced by parse_upload."""
|
||||
return {'filename': filename, 'text': text, 'b64': b64,
|
||||
'mimetype': mimetype, 'date_from_name': date_from_name,
|
||||
'sha256': 'abc'}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_batch_parse_single_llm_call_for_multiple_receipts():
|
||||
"""N text receipts must result in exactly 1 LLM call (batched prompt)."""
|
||||
agent = _make_agent()
|
||||
receipts = [
|
||||
_make_receipt('a.txt', 'Shell Gas\nTotal: $45.00'),
|
||||
_make_receipt('b.txt', 'Marriott Hotel\nAmount Due: $180.00'),
|
||||
_make_receipt('c.txt', 'Chipotle\nTotal: $12.75'),
|
||||
]
|
||||
products = [{'id': 1, 'name': 'Meals'}, {'id': 2, 'name': 'Travel'}, {'id': 3, 'name': 'Fuel'}]
|
||||
|
||||
llm_resp = MagicMock()
|
||||
llm_resp.content = (
|
||||
'[{"vendor":"Shell","product_name":"Fuel"},'
|
||||
'{"vendor":"Marriott","product_name":"Travel"},'
|
||||
'{"vendor":"Chipotle","product_name":"Meals"}]'
|
||||
)
|
||||
agent._llm.submit = AsyncMock(return_value=llm_resp)
|
||||
|
||||
with patch('agent_service.agents.expenses_agent._get_vision_mode', return_value='text'):
|
||||
results = await agent._batch_parse_receipts(receipts, products)
|
||||
|
||||
agent._llm.submit.assert_called_once()
|
||||
assert len(results) == 3
|
||||
assert results[0]['vendor'] == 'Shell'
|
||||
assert results[1]['vendor'] == 'Marriott'
|
||||
assert results[2]['vendor'] == 'Chipotle'
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_batch_parse_amounts_from_regex_not_llm():
|
||||
"""Amounts must come from regex (Phase 1), not from the LLM batch response."""
|
||||
agent = _make_agent()
|
||||
receipts = [_make_receipt('r.txt', 'Acme Store\nTotal: $99.99')]
|
||||
products = [{'id': 1, 'name': 'Supplies'}]
|
||||
|
||||
llm_resp = MagicMock()
|
||||
llm_resp.content = '[{"vendor":"Acme","product_name":"Supplies"}]'
|
||||
agent._llm.submit = AsyncMock(return_value=llm_resp)
|
||||
|
||||
with patch('agent_service.agents.expenses_agent._get_vision_mode', return_value='text'):
|
||||
results = await agent._batch_parse_receipts(receipts, products)
|
||||
|
||||
assert results[0]['amount'] == 99.99
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_batch_parse_no_private_keys_in_results():
|
||||
"""Internal _-prefixed keys must be stripped from every result dict."""
|
||||
agent = _make_agent()
|
||||
receipts = [_make_receipt('r.txt', 'Acme\nTotal: $10.00')]
|
||||
products = [{'id': 1, 'name': 'Meals'}]
|
||||
|
||||
llm_resp = MagicMock()
|
||||
llm_resp.content = '[{"vendor":"Acme","product_name":"Meals"}]'
|
||||
agent._llm.submit = AsyncMock(return_value=llm_resp)
|
||||
|
||||
with patch('agent_service.agents.expenses_agent._get_vision_mode', return_value='text'):
|
||||
results = await agent._batch_parse_receipts(receipts, products)
|
||||
|
||||
for entry in results:
|
||||
private = [k for k in entry if k.startswith('_')]
|
||||
assert private == [], f'Private keys not cleaned up: {private}'
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_batch_parse_bank_statement_skipped_no_llm():
|
||||
"""Bank statements inside a batch must be skipped; no LLM call for them."""
|
||||
agent = _make_agent()
|
||||
# 12 transaction lines → flagged as bank statement
|
||||
stmt = '\n'.join(f'05/{i+1:02d} MERCHANT {i} ${10 + i}.99' for i in range(12))
|
||||
receipts = [
|
||||
_make_receipt('stmt.pdf', stmt),
|
||||
_make_receipt('real.txt', 'Shell Gas\nTotal: $45.00'),
|
||||
]
|
||||
products = [{'id': 1, 'name': 'Fuel'}]
|
||||
|
||||
llm_resp = MagicMock()
|
||||
llm_resp.content = '[{"vendor":"Shell","product_name":"Fuel"}]'
|
||||
agent._llm.submit = AsyncMock(return_value=llm_resp)
|
||||
|
||||
with patch('agent_service.agents.expenses_agent._get_vision_mode', return_value='text'):
|
||||
results = await agent._batch_parse_receipts(receipts, products)
|
||||
|
||||
# Only 1 item sent to LLM (the real receipt, not the statement)
|
||||
agent._llm.submit.assert_called_once()
|
||||
# Statement entry has skip=True
|
||||
assert results[0].get('skip') is True
|
||||
assert results[0]['amount'] == 0.0
|
||||
# Real receipt parsed normally
|
||||
assert results[1]['vendor'] == 'Shell'
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_batch_parse_falls_back_on_malformed_json():
|
||||
"""When the batch LLM returns malformed JSON, falls back to individual calls."""
|
||||
agent = _make_agent()
|
||||
receipts = [
|
||||
_make_receipt('a.txt', 'Shell\nTotal: $45.00'),
|
||||
_make_receipt('b.txt', 'Marriott\nTotal: $180.00'),
|
||||
]
|
||||
products = [{'id': 1, 'name': 'Travel'}]
|
||||
|
||||
call_count = [0]
|
||||
individual_resp = MagicMock()
|
||||
individual_resp.content = '{"vendor":"Shell","product_name":"Travel"}'
|
||||
|
||||
async def _side_effect(messages, caller=None):
|
||||
call_count[0] += 1
|
||||
if call_count[0] == 1:
|
||||
bad = MagicMock()
|
||||
bad.content = 'not valid json at all'
|
||||
return bad
|
||||
return individual_resp
|
||||
|
||||
agent._llm.submit = _side_effect
|
||||
|
||||
with patch('agent_service.agents.expenses_agent._get_vision_mode', return_value='text'):
|
||||
results = await agent._batch_parse_receipts(receipts, products)
|
||||
|
||||
# 1 batch attempt + 2 individual fallback calls = 3
|
||||
assert call_count[0] == 3
|
||||
assert len(results) == 2
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_batch_parse_falls_back_on_wrong_item_count():
|
||||
"""When the LLM returns a JSON array with wrong length, falls back."""
|
||||
agent = _make_agent()
|
||||
receipts = [
|
||||
_make_receipt('a.txt', 'Shell\nTotal: $45.00'),
|
||||
_make_receipt('b.txt', 'Marriott\nTotal: $180.00'),
|
||||
]
|
||||
products = [{'id': 1, 'name': 'Travel'}]
|
||||
|
||||
call_count = [0]
|
||||
fallback_resp = MagicMock()
|
||||
fallback_resp.content = '{"vendor":"Shell","product_name":"Travel"}'
|
||||
|
||||
async def _side_effect(messages, caller=None):
|
||||
call_count[0] += 1
|
||||
if call_count[0] == 1:
|
||||
# Returns only 1 item, expected 2
|
||||
wrong = MagicMock()
|
||||
wrong.content = '[{"vendor":"Shell","product_name":"Travel"}]'
|
||||
return wrong
|
||||
return fallback_resp
|
||||
|
||||
agent._llm.submit = _side_effect
|
||||
|
||||
with patch('agent_service.agents.expenses_agent._get_vision_mode', return_value='text'):
|
||||
results = await agent._batch_parse_receipts(receipts, products)
|
||||
|
||||
# 1 batch attempt + 2 individual fallback calls = 3
|
||||
assert call_count[0] == 3
|
||||
assert len(results) == 2
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_batch_parse_no_products_skips_llm():
|
||||
"""When there are no expense products, the LLM is not called."""
|
||||
agent = _make_agent()
|
||||
receipts = [_make_receipt('r.txt', 'Acme\nTotal: $10.00')]
|
||||
agent._llm.submit = AsyncMock()
|
||||
|
||||
with patch('agent_service.agents.expenses_agent._get_vision_mode', return_value='text'):
|
||||
results = await agent._batch_parse_receipts(receipts, [])
|
||||
|
||||
agent._llm.submit.assert_not_called()
|
||||
assert len(results) == 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# parse_upload — receipt_parser.py
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user