From b23ab77ee93507345d5857b811030ec6ff13e7ed Mon Sep 17 00:00:00 2001
From: Carlos Garcia <tocmo@DESKTOP-O6U0UOS.AVC.local>
Date: Wed, 20 May 2026 19:15:39 -0400
Subject: [PATCH] fix: bot presence stays offline after vision model change
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ping() was calling ollama.AsyncClient.list() which parses /api/tags with
ollama==0.3.3 pydantic models. Vision models carry metadata fields that 0.3.x
cannot deserialise, raising ValidationError -> OllamaUnavailableError. This
made the /health/detailed ollama field 'error: ...' instead of 'ok', so
ab_ai_bot.py REQUIRED_SYSTEMS check failed and the bot never went online even
though the service was up.

Fix: ping() now uses httpx GET /api/version — model-agnostic, no metadata
parsing, always fast regardless of which model is loaded.

Also fix LLMRouter to accept direct backend injection for testability
(ollama=, claude=, privacy_mode=, env_overrides= kwargs), add _env_overrides
lookup in hybrid get_backend(), and fix cloud mode to return ollama when
_claude is None. All 6 test_llm_router tests now pass.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 agent_service/llm/llm_router.py     | 63 +++++++++++++++++++----------
 agent_service/llm/ollama_backend.py | 16 +++++---
 2 files changed, 53 insertions(+), 26 deletions(-)

diff --git a/agent_service/llm/llm_router.py b/agent_service/llm/llm_router.py
index 43fd894..03c28b4 100644
--- a/agent_service/llm/llm_router.py
+++ b/agent_service/llm/llm_router.py
@@ -10,26 +10,45 @@ HIPAA_LOCKED_AGENTS = frozenset({'finance_agent', 'accounting_agent', 'employees
 
 
 class LLMRouter:
-    def __init__(self, config, pg_pool=None):
-        self._config = config
-        self._privacy_mode = getattr(config, 'llm_privacy_mode', 'local')
-        self._config_store = LLMConfigStore(pg_pool) if pg_pool else None
-        self._ollama = OllamaBackend(
-            url=config.ollama_url, model=config.ollama_model,
-            timeout=config.ollama_timeout, max_concurrent=config.ollama_max_concurrent)
-        self._claude = None
-        if self._privacy_mode != 'local':
-            api_key = getattr(config, 'anthropic_api_key', None)
-            if api_key:
-                from .claude_backend import ClaudeBackend
-                self._claude = ClaudeBackend(
-                    api_key=api_key, model=config.claude_model,
-                    timeout=config.claude_timeout, max_concurrent=config.claude_max_concurrent)
-                logger.info('ClaudeBackend initialized mode=%s', self._privacy_mode)
-            elif self._privacy_mode == 'cloud':
-                logger.error('Privacy mode is cloud but ANTHROPIC_API_KEY not set')
-            else:
-                logger.warning('Privacy mode is hybrid but ANTHROPIC_API_KEY not set')
+    def __init__(self, config=None, pg_pool=None, *,
+                 ollama=None, claude=None, privacy_mode=None, env_overrides=None):
+        """Initialise the router.
+
+        Production usage: pass *config* (a Settings object) and optionally
+        *pg_pool* for the runtime config store.
+
+        Test/injection usage: pass *ollama*, *claude*, *privacy_mode*, and
+        *env_overrides* as keyword arguments; *config* may be omitted.
+        """
+        self._env_overrides: dict = dict(env_overrides or {})
+
+        if config is not None:
+            self._config = config
+            self._privacy_mode = getattr(config, 'llm_privacy_mode', 'local')
+            self._config_store = LLMConfigStore(pg_pool) if pg_pool else None
+            self._ollama = OllamaBackend(
+                url=config.ollama_url, model=config.ollama_model,
+                timeout=config.ollama_timeout, max_concurrent=config.ollama_max_concurrent)
+            self._claude = None
+            if self._privacy_mode != 'local':
+                api_key = getattr(config, 'anthropic_api_key', None)
+                if api_key:
+                    from .claude_backend import ClaudeBackend
+                    self._claude = ClaudeBackend(
+                        api_key=api_key, model=config.claude_model,
+                        timeout=config.claude_timeout, max_concurrent=config.claude_max_concurrent)
+                    logger.info('ClaudeBackend initialized mode=%s', self._privacy_mode)
+                elif self._privacy_mode == 'cloud':
+                    logger.error('Privacy mode is cloud but ANTHROPIC_API_KEY not set')
+                else:
+                    logger.warning('Privacy mode is hybrid but ANTHROPIC_API_KEY not set')
+        else:
+            # Injection path used in tests.
+            self._config = None
+            self._config_store = None
+            self._privacy_mode = privacy_mode or 'local'
+            self._ollama = ollama
+            self._claude = claude
 
     async def submit(self, messages, tools=None, caller='unknown'):
         backend_name = await self.get_backend(caller)
@@ -51,8 +70,10 @@ class LLMRouter:
         if self._privacy_mode == 'local':
             return 'ollama'
         if self._privacy_mode == 'cloud':
-            return 'claude'
+            return 'claude' if self._claude is not None else 'ollama'
         if self._privacy_mode == 'hybrid':
+            if caller in self._env_overrides:
+                return self._env_overrides[caller]
             if self._config_store:
                 try:
                     db_val = await self._config_store.get_backend(caller)
diff --git a/agent_service/llm/ollama_backend.py b/agent_service/llm/ollama_backend.py
index acbaf38..c9d3343 100644
--- a/agent_service/llm/ollama_backend.py
+++ b/agent_service/llm/ollama_backend.py
@@ -70,12 +70,18 @@ class OllamaBackend:
                 self._active -= 1
 
     async def ping(self) -> None:
-        """Raise if Ollama is unreachable."""
-        import ollama
-        client = ollama.AsyncClient(host=self._url)
+        """Raise if Ollama is unreachable.
+
+        Uses /api/version rather than /api/tags so the check is model-agnostic
+        and not affected by vision-model metadata that older ollama-python
+        releases cannot deserialise.
+        """
+        import httpx
         try:
-            await asyncio.wait_for(client.list(), timeout=5)
-        except asyncio.TimeoutError:
+            async with httpx.AsyncClient() as client:
+                r = await client.get(f'{self._url}/api/version', timeout=5.0)
+                r.raise_for_status()
+        except httpx.TimeoutException:
             raise OllamaUnavailableError('Ollama ping timed out')
         except Exception as exc:
             raise OllamaUnavailableError(f'Ollama ping failed: {exc}') from exc