fix: resolve all 5 startup constructor errors + add DB retry

Fixes all errors reported in docker compose logs agent-service:

1. config.py: add ollama_max_concurrent, claude_timeout, claude_max_concurrent
   fields so LLMRouter(config=settings) can read them without AttributeError.

2. main.py - LLM router: drop manual OllamaBackend/ClaudeBackend construction;
   call LLMRouter(config=settings, pg_pool=pool) to match class signature.
   Fixes: OllamaBackend.__init__() unexpected kwarg 'base_url'.

3. main.py - DB: add 5-attempt retry with 2s backoff and redacted DSN logging.
   Fixes: connection refused race on startup before Postgres accepts connections.

4. main.py - AgentRegistry: call AgentRegistry() with no args (class takes none),
   then await agent_registry.load_from_odoo(odoo) to populate active agents.
   Fixes: AgentRegistry.__init__() unexpected kwarg 'odoo'.

5. main.py - PeerBus: pass registry=agent_registry at construction; register
   specialist agents on agent_registry (not peer_bus, which has no register()).
   peer_bus.py: make directive_id optional (default None) — bus is a singleton
   at startup; directive_id is only needed per-request.
   Fixes: PeerBus.__init__() missing positional args 'registry' and 'directive_id'.

6. main.py - MasterAgent: drop unexpected peer_bus= kwarg from constructor call.
   Fixes: MasterAgent.__init__() unexpected kwarg 'peer_bus'.

7. mcp_router.py: pass NotificationOptions() instance instead of None.
   Fixes: AttributeError 'NoneType' has no attribute 'tools_changed' (was applied
   in running container but not committed; now committed).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Carlos Garcia
2026-04-24 16:48:23 -04:00
parent 66b114cdcf
commit c769fca79f
4 changed files with 41 additions and 49 deletions

View File

@@ -20,7 +20,7 @@ class PeerCircularRequestError(Exception): pass
class PeerBus: class PeerBus:
def __init__(self, registry, directive_id): def __init__(self, registry, directive_id=None):
self._registry = registry self._registry = registry
self._directive_id = directive_id self._directive_id = directive_id
self._call_log: list[dict] = [] self._call_log: list[dict] = []

View File

@@ -14,10 +14,13 @@ class Settings(BaseSettings):
ollama_url: str = 'http://localhost:11434' ollama_url: str = 'http://localhost:11434'
ollama_model: str = 'llama3' ollama_model: str = 'llama3'
ollama_timeout: int = 120 ollama_timeout: int = 120
ollama_max_concurrent: int = 2
# Anthropic / Claude # Anthropic / Claude
anthropic_api_key: str = '' anthropic_api_key: str = ''
claude_model: str = 'claude-sonnet-4-6' claude_model: str = 'claude-sonnet-4-6'
claude_timeout: int = 120
claude_max_concurrent: int = 2
# Privacy # Privacy
llm_privacy_mode: str = 'local' # local | hybrid | cloud llm_privacy_mode: str = 'local' # local | hybrid | cloud

View File

@@ -16,18 +16,32 @@ logger = logging.getLogger(__name__)
async def _init_db(settings) -> asyncpg.Pool: async def _init_db(settings) -> asyncpg.Pool:
pool = await asyncpg.create_pool( redacted_dsn = (
host=settings.postgres_host, f'postgresql://{settings.postgres_user}:***'
port=settings.postgres_port, f'@{settings.postgres_host}:{settings.postgres_port}/{settings.postgres_db}'
database=settings.postgres_db,
user=settings.postgres_user,
password=settings.postgres_password,
min_size=settings.postgres_min_connections,
max_size=settings.postgres_max_connections,
max_inactive_connection_lifetime=300,
) )
logger.info('DB pool created (min=%d max=%d)', settings.postgres_min_connections, settings.postgres_max_connections) last_exc: Exception | None = None
return pool for attempt in range(1, 6):
try:
pool = await asyncpg.create_pool(
host=settings.postgres_host,
port=settings.postgres_port,
database=settings.postgres_db,
user=settings.postgres_user,
password=settings.postgres_password,
min_size=settings.postgres_min_connections,
max_size=settings.postgres_max_connections,
max_inactive_connection_lifetime=300,
)
logger.info('DB pool created (min=%d max=%d dsn=%s)',
settings.postgres_min_connections, settings.postgres_max_connections, redacted_dsn)
return pool
except Exception as exc:
last_exc = exc
logger.warning('DB connect attempt %d/5 failed (dsn=%s): %s', attempt, redacted_dsn, exc)
if attempt < 5:
await asyncio.sleep(2)
raise last_exc # type: ignore[misc]
async def _db_health_loop(pool: asyncpg.Pool) -> None: async def _db_health_loop(pool: asyncpg.Pool) -> None:
@@ -56,35 +70,8 @@ async def lifespan(app: FastAPI):
# 2. LLM Router # 2. LLM Router
try: try:
from .llm.ollama_backend import OllamaBackend
from .llm.llm_config_store import LLMConfigStore
from .llm.llm_router import LLMRouter from .llm.llm_router import LLMRouter
llm_router = LLMRouter(config=settings, pg_pool=pool)
ollama = OllamaBackend(
base_url=settings.ollama_url,
model=settings.ollama_model,
timeout=settings.ollama_timeout,
)
config_store = LLMConfigStore(pool) if pool else None
claude = None
if settings.llm_privacy_mode != 'local' and settings.anthropic_api_key:
from .llm.claude_backend import ClaudeBackend
claude = ClaudeBackend(api_key=settings.anthropic_api_key, model=settings.claude_model)
llm_router = LLMRouter(
ollama=ollama,
claude=claude,
config_store=config_store,
privacy_mode=settings.llm_privacy_mode,
env_overrides={
name: settings.agent_backend_override(name)
for name in [
'finance_agent', 'accounting_agent', 'crm_agent', 'sales_agent',
'project_agent', 'elearning_agent', 'expenses_agent', 'employees_agent',
]
if settings.agent_backend_override(name)
},
)
app_state.set_llm_router(llm_router) app_state.set_llm_router(llm_router)
logger.info('LLM router ready (mode=%s)', settings.llm_privacy_mode) logger.info('LLM router ready (mode=%s)', settings.llm_privacy_mode)
except Exception as exc: except Exception as exc:
@@ -107,7 +94,9 @@ async def lifespan(app: FastAPI):
# 4. Agent registry # 4. Agent registry
try: try:
from .agents.registry import AgentRegistry from .agents.registry import AgentRegistry
agent_registry = AgentRegistry(odoo=odoo, pool=pool) agent_registry = AgentRegistry()
if odoo:
await agent_registry.load_from_odoo(odoo)
app_state.set_agent_registry(agent_registry) app_state.set_agent_registry(agent_registry)
except Exception as exc: except Exception as exc:
logger.error('Failed to init agent registry: %s', exc) logger.error('Failed to init agent registry: %s', exc)
@@ -124,8 +113,9 @@ async def lifespan(app: FastAPI):
# 6. Peer bus + specialist agents # 6. Peer bus + specialist agents
try: try:
from .agents.peer_bus import PeerBus from .agents.peer_bus import PeerBus
peer_bus = PeerBus() peer_bus = PeerBus(registry=agent_registry)
_register_specialist_agents(peer_bus, odoo, llm_router) if agent_registry:
_register_specialist_agents(agent_registry, peer_bus, odoo, llm_router)
except Exception as exc: except Exception as exc:
logger.error('Failed to init peer bus / specialist agents: %s', exc) logger.error('Failed to init peer bus / specialist agents: %s', exc)
peer_bus = None peer_bus = None
@@ -137,7 +127,6 @@ async def lifespan(app: FastAPI):
odoo=odoo, odoo=odoo,
llm=llm_router, llm=llm_router,
memory=memory_mgr, memory=memory_mgr,
peer_bus=peer_bus,
registry=agent_registry, registry=agent_registry,
) )
app_state.set_master_agent(master) app_state.set_master_agent(master)
@@ -164,10 +153,10 @@ async def lifespan(app: FastAPI):
logger.info('Agent service shut down') logger.info('Agent service shut down')
def _register_specialist_agents(peer_bus, odoo, llm_router) -> None: def _register_specialist_agents(agent_registry, peer_bus, odoo, llm_router) -> None:
try: try:
from .agents.finance_agent import FinanceAgent from .agents.finance_agent import FinanceAgent
peer_bus.register('finance_agent', FinanceAgent(odoo=odoo, llm=llm_router, peer_bus=peer_bus)) agent_registry.register('finance_agent', FinanceAgent(odoo=odoo, llm=llm_router, peer_bus=peer_bus))
except Exception as exc: except Exception as exc:
logger.warning('Could not register finance_agent: %s', exc) logger.warning('Could not register finance_agent: %s', exc)
@@ -181,12 +170,11 @@ def _register_specialist_agents(peer_bus, odoo, llm_router) -> None:
'employees_agent': 'EmployeesAgent', 'employees_agent': 'EmployeesAgent',
} }
for agent_name, class_name in specialist_map.items(): for agent_name, class_name in specialist_map.items():
module_name = agent_name.replace('_agent', '_agent')
try: try:
import importlib import importlib
mod = importlib.import_module(f'.agents.{agent_name}', package='agent_service') mod = importlib.import_module(f'.agents.{agent_name}', package='agent_service')
cls = getattr(mod, class_name) cls = getattr(mod, class_name)
peer_bus.register(agent_name, cls(odoo=odoo, llm=llm_router, peer_bus=peer_bus)) agent_registry.register(agent_name, cls(odoo=odoo, llm=llm_router, peer_bus=peer_bus))
except ImportError: except ImportError:
logger.debug('%s module not yet implemented, skipping', agent_name) logger.debug('%s module not yet implemented, skipping', agent_name)
except Exception as exc: except Exception as exc:

View File

@@ -10,6 +10,7 @@ from __future__ import annotations
import logging import logging
from mcp.server.models import InitializationOptions from mcp.server.models import InitializationOptions
from mcp.server.lowlevel import NotificationOptions
from starlette.requests import Request from starlette.requests import Request
from starlette.responses import Response from starlette.responses import Response
from starlette.routing import Route from starlette.routing import Route
@@ -26,7 +27,7 @@ _INIT_OPTIONS = InitializationOptions(
server_name='activeblue-ai', server_name='activeblue-ai',
server_version='0.1.0', server_version='0.1.0',
capabilities=_mcp_server.get_capabilities( capabilities=_mcp_server.get_capabilities(
notification_options=None, notification_options=NotificationOptions(),
experimental_capabilities={}, experimental_capabilities={},
), ),
) )