From b23ab77ee93507345d5857b811030ec6ff13e7ed Mon Sep 17 00:00:00 2001 From: Carlos Garcia Date: Wed, 20 May 2026 19:15:39 -0400 Subject: [PATCH] fix: bot presence stays offline after vision model change MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ping() was calling ollama.AsyncClient.list() which parses /api/tags with ollama==0.3.3 pydantic models. Vision models carry metadata fields that 0.3.x cannot deserialise, raising ValidationError -> OllamaUnavailableError. This made the /health/detailed ollama field 'error: ...' instead of 'ok', so ab_ai_bot.py REQUIRED_SYSTEMS check failed and the bot never went online even though the service was up. Fix: ping() now uses httpx GET /api/version — model-agnostic, no metadata parsing, always fast regardless of which model is loaded. Also fix LLMRouter to accept direct backend injection for testability (ollama=, claude=, privacy_mode=, env_overrides= kwargs), add _env_overrides lookup in hybrid get_backend(), and fix cloud mode to return ollama when _claude is None. All 6 test_llm_router tests now pass. Co-Authored-By: Claude Sonnet 4.6 --- agent_service/llm/llm_router.py | 63 +++++++++++++++++++---------- agent_service/llm/ollama_backend.py | 16 +++++--- 2 files changed, 53 insertions(+), 26 deletions(-) diff --git a/agent_service/llm/llm_router.py b/agent_service/llm/llm_router.py index 43fd894..03c28b4 100644 --- a/agent_service/llm/llm_router.py +++ b/agent_service/llm/llm_router.py @@ -10,26 +10,45 @@ HIPAA_LOCKED_AGENTS = frozenset({'finance_agent', 'accounting_agent', 'employees class LLMRouter: - def __init__(self, config, pg_pool=None): - self._config = config - self._privacy_mode = getattr(config, 'llm_privacy_mode', 'local') - self._config_store = LLMConfigStore(pg_pool) if pg_pool else None - self._ollama = OllamaBackend( - url=config.ollama_url, model=config.ollama_model, - timeout=config.ollama_timeout, max_concurrent=config.ollama_max_concurrent) - self._claude = None - if self._privacy_mode != 'local': - api_key = getattr(config, 'anthropic_api_key', None) - if api_key: - from .claude_backend import ClaudeBackend - self._claude = ClaudeBackend( - api_key=api_key, model=config.claude_model, - timeout=config.claude_timeout, max_concurrent=config.claude_max_concurrent) - logger.info('ClaudeBackend initialized mode=%s', self._privacy_mode) - elif self._privacy_mode == 'cloud': - logger.error('Privacy mode is cloud but ANTHROPIC_API_KEY not set') - else: - logger.warning('Privacy mode is hybrid but ANTHROPIC_API_KEY not set') + def __init__(self, config=None, pg_pool=None, *, + ollama=None, claude=None, privacy_mode=None, env_overrides=None): + """Initialise the router. + + Production usage: pass *config* (a Settings object) and optionally + *pg_pool* for the runtime config store. + + Test/injection usage: pass *ollama*, *claude*, *privacy_mode*, and + *env_overrides* as keyword arguments; *config* may be omitted. + """ + self._env_overrides: dict = dict(env_overrides or {}) + + if config is not None: + self._config = config + self._privacy_mode = getattr(config, 'llm_privacy_mode', 'local') + self._config_store = LLMConfigStore(pg_pool) if pg_pool else None + self._ollama = OllamaBackend( + url=config.ollama_url, model=config.ollama_model, + timeout=config.ollama_timeout, max_concurrent=config.ollama_max_concurrent) + self._claude = None + if self._privacy_mode != 'local': + api_key = getattr(config, 'anthropic_api_key', None) + if api_key: + from .claude_backend import ClaudeBackend + self._claude = ClaudeBackend( + api_key=api_key, model=config.claude_model, + timeout=config.claude_timeout, max_concurrent=config.claude_max_concurrent) + logger.info('ClaudeBackend initialized mode=%s', self._privacy_mode) + elif self._privacy_mode == 'cloud': + logger.error('Privacy mode is cloud but ANTHROPIC_API_KEY not set') + else: + logger.warning('Privacy mode is hybrid but ANTHROPIC_API_KEY not set') + else: + # Injection path used in tests. + self._config = None + self._config_store = None + self._privacy_mode = privacy_mode or 'local' + self._ollama = ollama + self._claude = claude async def submit(self, messages, tools=None, caller='unknown'): backend_name = await self.get_backend(caller) @@ -51,8 +70,10 @@ class LLMRouter: if self._privacy_mode == 'local': return 'ollama' if self._privacy_mode == 'cloud': - return 'claude' + return 'claude' if self._claude is not None else 'ollama' if self._privacy_mode == 'hybrid': + if caller in self._env_overrides: + return self._env_overrides[caller] if self._config_store: try: db_val = await self._config_store.get_backend(caller) diff --git a/agent_service/llm/ollama_backend.py b/agent_service/llm/ollama_backend.py index acbaf38..c9d3343 100644 --- a/agent_service/llm/ollama_backend.py +++ b/agent_service/llm/ollama_backend.py @@ -70,12 +70,18 @@ class OllamaBackend: self._active -= 1 async def ping(self) -> None: - """Raise if Ollama is unreachable.""" - import ollama - client = ollama.AsyncClient(host=self._url) + """Raise if Ollama is unreachable. + + Uses /api/version rather than /api/tags so the check is model-agnostic + and not affected by vision-model metadata that older ollama-python + releases cannot deserialise. + """ + import httpx try: - await asyncio.wait_for(client.list(), timeout=5) - except asyncio.TimeoutError: + async with httpx.AsyncClient() as client: + r = await client.get(f'{self._url}/api/version', timeout=5.0) + r.raise_for_status() + except httpx.TimeoutException: raise OllamaUnavailableError('Ollama ping timed out') except Exception as exc: raise OllamaUnavailableError(f'Ollama ping failed: {exc}') from exc