fix: bot presence stays offline after vision model change

ping() was calling ollama.AsyncClient.list() which parses /api/tags with
ollama==0.3.3 pydantic models. Vision models carry metadata fields that 0.3.x
cannot deserialise, raising ValidationError -> OllamaUnavailableError. This
made the /health/detailed ollama field 'error: ...' instead of 'ok', so
ab_ai_bot.py REQUIRED_SYSTEMS check failed and the bot never went online even
though the service was up.

Fix: ping() now uses httpx GET /api/version — model-agnostic, no metadata
parsing, always fast regardless of which model is loaded.

Also fix LLMRouter to accept direct backend injection for testability
(ollama=, claude=, privacy_mode=, env_overrides= kwargs), add _env_overrides
lookup in hybrid get_backend(), and fix cloud mode to return ollama when
_claude is None. All 6 test_llm_router tests now pass.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Carlos Garcia
2026-05-20 19:15:39 -04:00
parent 2f9791f925
commit b23ab77ee9
2 changed files with 53 additions and 26 deletions

View File

@@ -10,26 +10,45 @@ HIPAA_LOCKED_AGENTS = frozenset({'finance_agent', 'accounting_agent', 'employees
class LLMRouter: class LLMRouter:
def __init__(self, config, pg_pool=None): def __init__(self, config=None, pg_pool=None, *,
self._config = config ollama=None, claude=None, privacy_mode=None, env_overrides=None):
self._privacy_mode = getattr(config, 'llm_privacy_mode', 'local') """Initialise the router.
self._config_store = LLMConfigStore(pg_pool) if pg_pool else None
self._ollama = OllamaBackend( Production usage: pass *config* (a Settings object) and optionally
url=config.ollama_url, model=config.ollama_model, *pg_pool* for the runtime config store.
timeout=config.ollama_timeout, max_concurrent=config.ollama_max_concurrent)
self._claude = None Test/injection usage: pass *ollama*, *claude*, *privacy_mode*, and
if self._privacy_mode != 'local': *env_overrides* as keyword arguments; *config* may be omitted.
api_key = getattr(config, 'anthropic_api_key', None) """
if api_key: self._env_overrides: dict = dict(env_overrides or {})
from .claude_backend import ClaudeBackend
self._claude = ClaudeBackend( if config is not None:
api_key=api_key, model=config.claude_model, self._config = config
timeout=config.claude_timeout, max_concurrent=config.claude_max_concurrent) self._privacy_mode = getattr(config, 'llm_privacy_mode', 'local')
logger.info('ClaudeBackend initialized mode=%s', self._privacy_mode) self._config_store = LLMConfigStore(pg_pool) if pg_pool else None
elif self._privacy_mode == 'cloud': self._ollama = OllamaBackend(
logger.error('Privacy mode is cloud but ANTHROPIC_API_KEY not set') url=config.ollama_url, model=config.ollama_model,
else: timeout=config.ollama_timeout, max_concurrent=config.ollama_max_concurrent)
logger.warning('Privacy mode is hybrid but ANTHROPIC_API_KEY not set') self._claude = None
if self._privacy_mode != 'local':
api_key = getattr(config, 'anthropic_api_key', None)
if api_key:
from .claude_backend import ClaudeBackend
self._claude = ClaudeBackend(
api_key=api_key, model=config.claude_model,
timeout=config.claude_timeout, max_concurrent=config.claude_max_concurrent)
logger.info('ClaudeBackend initialized mode=%s', self._privacy_mode)
elif self._privacy_mode == 'cloud':
logger.error('Privacy mode is cloud but ANTHROPIC_API_KEY not set')
else:
logger.warning('Privacy mode is hybrid but ANTHROPIC_API_KEY not set')
else:
# Injection path used in tests.
self._config = None
self._config_store = None
self._privacy_mode = privacy_mode or 'local'
self._ollama = ollama
self._claude = claude
async def submit(self, messages, tools=None, caller='unknown'): async def submit(self, messages, tools=None, caller='unknown'):
backend_name = await self.get_backend(caller) backend_name = await self.get_backend(caller)
@@ -51,8 +70,10 @@ class LLMRouter:
if self._privacy_mode == 'local': if self._privacy_mode == 'local':
return 'ollama' return 'ollama'
if self._privacy_mode == 'cloud': if self._privacy_mode == 'cloud':
return 'claude' return 'claude' if self._claude is not None else 'ollama'
if self._privacy_mode == 'hybrid': if self._privacy_mode == 'hybrid':
if caller in self._env_overrides:
return self._env_overrides[caller]
if self._config_store: if self._config_store:
try: try:
db_val = await self._config_store.get_backend(caller) db_val = await self._config_store.get_backend(caller)

View File

@@ -70,12 +70,18 @@ class OllamaBackend:
self._active -= 1 self._active -= 1
async def ping(self) -> None: async def ping(self) -> None:
"""Raise if Ollama is unreachable.""" """Raise if Ollama is unreachable.
import ollama
client = ollama.AsyncClient(host=self._url) Uses /api/version rather than /api/tags so the check is model-agnostic
and not affected by vision-model metadata that older ollama-python
releases cannot deserialise.
"""
import httpx
try: try:
await asyncio.wait_for(client.list(), timeout=5) async with httpx.AsyncClient() as client:
except asyncio.TimeoutError: r = await client.get(f'{self._url}/api/version', timeout=5.0)
r.raise_for_status()
except httpx.TimeoutException:
raise OllamaUnavailableError('Ollama ping timed out') raise OllamaUnavailableError('Ollama ping timed out')
except Exception as exc: except Exception as exc:
raise OllamaUnavailableError(f'Ollama ping failed: {exc}') from exc raise OllamaUnavailableError(f'Ollama ping failed: {exc}') from exc