ping() was calling ollama.AsyncClient.list() which parses /api/tags with ollama==0.3.3 pydantic models. Vision models carry metadata fields that 0.3.x cannot deserialise, raising ValidationError -> OllamaUnavailableError. This made the /health/detailed ollama field 'error: ...' instead of 'ok', so ab_ai_bot.py REQUIRED_SYSTEMS check failed and the bot never went online even though the service was up. Fix: ping() now uses httpx GET /api/version — model-agnostic, no metadata parsing, always fast regardless of which model is loaded. Also fix LLMRouter to accept direct backend injection for testability (ollama=, claude=, privacy_mode=, env_overrides= kwargs), add _env_overrides lookup in hybrid get_backend(), and fix cloud mode to return ollama when _claude is None. All 6 test_llm_router tests now pass. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
134 lines
6.2 KiB
Python
134 lines
6.2 KiB
Python
from __future__ import annotations
|
|
import logging, os
|
|
from .llm_types import LLMResponse, OllamaUnavailableError, ClaudeTimeoutError, ClaudeRateLimitError
|
|
from .ollama_backend import OllamaBackend
|
|
from .llm_config_store import LLMConfigStore
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
HIPAA_LOCKED_AGENTS = frozenset({'finance_agent', 'accounting_agent', 'employees_agent', 'expenses_agent'})
|
|
|
|
|
|
class LLMRouter:
|
|
def __init__(self, config=None, pg_pool=None, *,
|
|
ollama=None, claude=None, privacy_mode=None, env_overrides=None):
|
|
"""Initialise the router.
|
|
|
|
Production usage: pass *config* (a Settings object) and optionally
|
|
*pg_pool* for the runtime config store.
|
|
|
|
Test/injection usage: pass *ollama*, *claude*, *privacy_mode*, and
|
|
*env_overrides* as keyword arguments; *config* may be omitted.
|
|
"""
|
|
self._env_overrides: dict = dict(env_overrides or {})
|
|
|
|
if config is not None:
|
|
self._config = config
|
|
self._privacy_mode = getattr(config, 'llm_privacy_mode', 'local')
|
|
self._config_store = LLMConfigStore(pg_pool) if pg_pool else None
|
|
self._ollama = OllamaBackend(
|
|
url=config.ollama_url, model=config.ollama_model,
|
|
timeout=config.ollama_timeout, max_concurrent=config.ollama_max_concurrent)
|
|
self._claude = None
|
|
if self._privacy_mode != 'local':
|
|
api_key = getattr(config, 'anthropic_api_key', None)
|
|
if api_key:
|
|
from .claude_backend import ClaudeBackend
|
|
self._claude = ClaudeBackend(
|
|
api_key=api_key, model=config.claude_model,
|
|
timeout=config.claude_timeout, max_concurrent=config.claude_max_concurrent)
|
|
logger.info('ClaudeBackend initialized mode=%s', self._privacy_mode)
|
|
elif self._privacy_mode == 'cloud':
|
|
logger.error('Privacy mode is cloud but ANTHROPIC_API_KEY not set')
|
|
else:
|
|
logger.warning('Privacy mode is hybrid but ANTHROPIC_API_KEY not set')
|
|
else:
|
|
# Injection path used in tests.
|
|
self._config = None
|
|
self._config_store = None
|
|
self._privacy_mode = privacy_mode or 'local'
|
|
self._ollama = ollama
|
|
self._claude = claude
|
|
|
|
async def submit(self, messages, tools=None, caller='unknown'):
|
|
backend_name = await self.get_backend(caller)
|
|
if backend_name == 'claude':
|
|
if self._claude is None:
|
|
logger.warning('Claude requested but unavailable, fallback to Ollama caller=%s', caller)
|
|
backend_name = 'ollama'
|
|
else:
|
|
try:
|
|
return await self._claude.submit(messages, tools, caller)
|
|
except (ClaudeTimeoutError, ClaudeRateLimitError) as exc:
|
|
logger.warning('Claude failed caller=%s (%s), falling back to Ollama', caller, exc)
|
|
return await self._ollama.submit(messages, tools, caller)
|
|
return await self._ollama.submit(messages, tools, caller)
|
|
|
|
async def get_backend(self, caller):
|
|
if caller in HIPAA_LOCKED_AGENTS:
|
|
return 'ollama'
|
|
if self._privacy_mode == 'local':
|
|
return 'ollama'
|
|
if self._privacy_mode == 'cloud':
|
|
return 'claude' if self._claude is not None else 'ollama'
|
|
if self._privacy_mode == 'hybrid':
|
|
if caller in self._env_overrides:
|
|
return self._env_overrides[caller]
|
|
if self._config_store:
|
|
try:
|
|
db_val = await self._config_store.get_backend(caller)
|
|
if db_val:
|
|
return db_val
|
|
except Exception as exc:
|
|
logger.warning('LLMConfigStore lookup failed: %s', exc)
|
|
env_key = f'AGENT_BACKEND_{caller.upper()}'
|
|
env_val = os.environ.get(env_key)
|
|
if env_val in ('ollama', 'claude'):
|
|
return env_val
|
|
return 'ollama'
|
|
logger.error('Unknown privacy mode %s, defaulting to ollama', self._privacy_mode)
|
|
return 'ollama'
|
|
|
|
async def set_backend(self, caller, backend, set_by, note=None):
|
|
if caller in HIPAA_LOCKED_AGENTS:
|
|
raise ValueError(f'Cannot override backend for HIPAA-locked agent: {caller}')
|
|
if backend not in ('ollama', 'claude'):
|
|
raise ValueError(f'Invalid backend: {backend}')
|
|
if not self._config_store:
|
|
raise RuntimeError('No Postgres pool for runtime config store')
|
|
await self._config_store.set_backend(caller, backend, set_by, note)
|
|
|
|
async def set_privacy_mode(self, mode, set_by):
|
|
if mode not in ('local', 'hybrid', 'cloud'):
|
|
raise ValueError(f'Invalid privacy mode: {mode}')
|
|
self._privacy_mode = mode
|
|
if self._config_store:
|
|
await self._config_store.set_backend('__system__', mode, set_by,
|
|
f'Privacy mode changed to {mode}')
|
|
if mode == 'local':
|
|
self._claude = None
|
|
logger.info('Privacy mode set to local - ClaudeBackend disabled')
|
|
elif mode in ('hybrid', 'cloud') and self._claude is None:
|
|
api_key = getattr(self._config, 'anthropic_api_key', None)
|
|
if api_key:
|
|
from .claude_backend import ClaudeBackend
|
|
self._claude = ClaudeBackend(
|
|
api_key=api_key, model=self._config.claude_model,
|
|
timeout=self._config.claude_timeout, max_concurrent=self._config.claude_max_concurrent)
|
|
logger.info('Privacy mode set to %s by user_id=%s', mode, set_by)
|
|
|
|
async def get_status(self):
|
|
s = {'privacy_mode': self._privacy_mode,
|
|
'ollama': {'active': self._ollama.active_count}}
|
|
if self._claude:
|
|
s['claude'] = {'active': self._claude.active_count}
|
|
else:
|
|
s['claude'] = {'available': False, 'reason': 'local mode or no API key'}
|
|
return s
|
|
|
|
@property
|
|
def ollama_queue_depth(self): return self._ollama.active_count
|
|
|
|
@property
|
|
def claude_active_count(self): return self._claude.active_count if self._claude else 0
|