feat(infra): add sweep coordinator, structured logging, test suite, and README

Sweep coordinator (Step 16):
- SweepCoordinator runs all 8 agents in parallel with 60s per-agent / 300s total timeout
- Aggregates findings, actions, errors into SweepCoordinatorResult
- Registered in FastAPI lifespan; triggered via POST /sweep

Structured logging (Step 18):
- logging_utils/structured.py: JSONFormatter emitting ts/level/logger/msg + custom fields
- log_directive_event() for structured directive lifecycle logging
- push_to_loki() async Loki push (graceful no-op if LOKI_URL unset)
- configure_logging() replaces root handler at startup

Tests (Steps 17+19):
- conftest.py: mock_odoo, mock_pool, mock_llm fixtures
- test_tool_validator.py: 9 tests covering validation, coercion, hallucination stripping
- test_llm_router.py: 6 tests covering local/cloud/hybrid modes and HIPAA enforcement
- test_peer_bus.py: 6 tests covering registration, timeout, depth, circular detection
- test_finance_agent.py: 10 tests covering all 6 steps + sweep + peer request
- test_memory_manager.py: 3 tests covering context build + hard cap enforcement
- test_dispatch_router.py: 3 tests covering dispatch, rate limit, health endpoint
- test_odoo_client.py: 4 tests covering search_read, write result, unlink warning
- test_e2e_dispatch.py: 2 E2E tests - full dispatch cycle + peer bus communication

README (Step 20): architecture diagram, privacy modes, quick start, env vars, structure

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
ActiveBlue Build
2026-04-12 18:08:11 -04:00
parent fe47f950e4
commit 7487fc73f9
15 changed files with 1042 additions and 0 deletions

View File

@@ -0,0 +1,100 @@
from __future__ import annotations
import asyncio
import logging
from dataclasses import dataclass, field
from typing import Optional
logger = logging.getLogger(__name__)
ALL_AGENT_NAMES = [
'finance_agent', 'accounting_agent', 'crm_agent', 'sales_agent',
'project_agent', 'elearning_agent', 'expenses_agent', 'employees_agent',
]
@dataclass
class SweepCoordinatorResult:
total_agents: int = 0
completed: int = 0
failed: int = 0
total_findings: int = 0
total_actions: int = 0
agent_results: list = field(default_factory=list)
errors: dict = field(default_factory=dict)
class SweepCoordinator:
def __init__(self, peer_bus=None):
self._peer_bus = peer_bus
async def run_sweep(self, agents: Optional[list[str]] = None) -> dict:
names = agents or ALL_AGENT_NAMES
result = SweepCoordinatorResult(total_agents=len(names))
logger.info('SweepCoordinator: starting sweep for %d agents', len(names))
tasks = {}
for name in names:
agent = self._get_agent(name)
if agent is None:
logger.debug('Sweep: agent %s not available, skipping', name)
result.failed += 1
result.errors[name] = 'agent not available'
continue
tasks[name] = asyncio.create_task(self._run_agent_sweep(name, agent))
if not tasks:
logger.warning('SweepCoordinator: no agents available for sweep')
return result.__dict__
done, _ = await asyncio.wait(list(tasks.values()), timeout=300)
for name, task in tasks.items():
if task not in done:
logger.warning('Sweep timeout for agent %s', name)
result.failed += 1
result.errors[name] = 'timeout'
task.cancel()
continue
try:
sweep_report = task.result()
result.completed += 1
result.total_findings += len(sweep_report.findings)
result.total_actions += len(sweep_report.actions)
result.agent_results.append({
'agent': name,
'findings': sweep_report.findings,
'actions': sweep_report.actions,
'summary': sweep_report.summary,
'error': sweep_report.error,
})
logger.info(
'Sweep %s: %d findings, %d actions',
name, len(sweep_report.findings), len(sweep_report.actions),
)
except Exception as exc:
logger.error('Sweep result error for %s: %s', name, exc)
result.failed += 1
result.errors[name] = str(exc)
logger.info(
'SweepCoordinator done: %d completed, %d failed, %d findings, %d actions',
result.completed, result.failed, result.total_findings, result.total_actions,
)
return result.__dict__
async def _run_agent_sweep(self, name: str, agent):
try:
return await asyncio.wait_for(agent.sweep(), timeout=60)
except asyncio.TimeoutError:
from .base_agent import SweepReport
logger.warning('Agent sweep timed out: %s', name)
return SweepReport(agent=name, findings=[], actions=[], error='timeout')
except Exception as exc:
from .base_agent import SweepReport
logger.error('Agent sweep error %s: %s', name, exc)
return SweepReport(agent=name, findings=[], actions=[], error=str(exc))
def _get_agent(self, name: str):
if self._peer_bus is None:
return None
return self._peer_bus.get_agent(name)

View File

@@ -0,0 +1,3 @@
from .structured import configure_logging, get_logger, log_directive_event, push_to_loki
__all__ = ['configure_logging', 'get_logger', 'log_directive_event', 'push_to_loki']

View File

@@ -0,0 +1,93 @@
from __future__ import annotations
import json
import logging
import sys
import time
from typing import Any, Optional
_loki_url: Optional[str] = None
_service_name = 'activeblue-ai'
class JSONFormatter(logging.Formatter):
"""Emit log records as single-line JSON for Loki/structured log ingestion."""
def format(self, record: logging.LogRecord) -> str:
log_entry: dict[str, Any] = {
'ts': time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(record.created)),
'level': record.levelname,
'logger': record.name,
'msg': record.getMessage(),
'service': _service_name,
}
if record.exc_info:
log_entry['exception'] = self.formatException(record.exc_info)
for key in ('directive_id', 'agent', 'user_id', 'action', 'duration_ms'):
val = getattr(record, key, None)
if val is not None:
log_entry[key] = val
return json.dumps(log_entry, default=str)
def configure_logging(level: str = 'INFO', fmt: str = 'json', loki_url: str = '') -> None:
global _loki_url
_loki_url = loki_url or None
numeric_level = getattr(logging, level.upper(), logging.INFO)
root = logging.getLogger()
root.setLevel(numeric_level)
root.handlers.clear()
handler = logging.StreamHandler(sys.stdout)
if fmt == 'json':
handler.setFormatter(JSONFormatter())
else:
handler.setFormatter(logging.Formatter(
'%(asctime)s %(name)-20s %(levelname)-8s %(message)s',
))
root.addHandler(handler)
logging.getLogger('uvicorn.access').setLevel(logging.WARNING)
logging.getLogger('asyncpg').setLevel(logging.WARNING)
def get_logger(name: str) -> logging.Logger:
return logging.getLogger(name)
class _DirectiveAdapter(logging.LoggerAdapter):
def process(self, msg, kwargs):
kwargs.setdefault('extra', {})
kwargs['extra'].update(self.extra)
return msg, kwargs
def log_directive_event(logger: logging.Logger, level: str, msg: str,
directive_id: str = '', agent: str = '',
user_id: str = '', action: str = '',
duration_ms: int = 0) -> None:
extra = {}
if directive_id:
extra['directive_id'] = directive_id
if agent:
extra['agent'] = agent
if user_id:
extra['user_id'] = user_id
if action:
extra['action'] = action
if duration_ms:
extra['duration_ms'] = duration_ms
getattr(logger, level.lower(), logger.info)(msg, extra=extra)
async def push_to_loki(labels: dict, entries: list[dict]) -> None:
if not _loki_url:
return
try:
import httpx
streams = [{'stream': labels, 'values': [[str(int(e['ts'] * 1e9)), e['line']] for e in entries]}]
payload = {'streams': streams}
async with httpx.AsyncClient(timeout=5) as client:
await client.post(f'{_loki_url}/loki/api/v1/push', json=payload)
except Exception as exc:
logging.getLogger(__name__).debug('Loki push failed: %s', exc)