from __future__ import annotations import logging, os from .llm_types import LLMResponse, OllamaUnavailableError, ClaudeTimeoutError, ClaudeRateLimitError from .ollama_backend import OllamaBackend from .llm_config_store import LLMConfigStore logger = logging.getLogger(__name__) HIPAA_LOCKED_AGENTS = frozenset({'finance_agent', 'accounting_agent', 'employees_agent', 'expenses_agent'}) class LLMRouter: def __init__(self, config, pg_pool=None): self._config = config self._privacy_mode = getattr(config, 'llm_privacy_mode', 'local') self._config_store = LLMConfigStore(pg_pool) if pg_pool else None self._ollama = OllamaBackend( url=config.ollama_url, model=config.ollama_model, timeout=config.ollama_timeout, max_concurrent=config.ollama_max_concurrent) self._claude = None if self._privacy_mode != 'local': api_key = getattr(config, 'anthropic_api_key', None) if api_key: from .claude_backend import ClaudeBackend self._claude = ClaudeBackend( api_key=api_key, model=config.claude_model, timeout=config.claude_timeout, max_concurrent=config.claude_max_concurrent) logger.info('ClaudeBackend initialized mode=%s', self._privacy_mode) elif self._privacy_mode == 'cloud': logger.error('Privacy mode is cloud but ANTHROPIC_API_KEY not set') else: logger.warning('Privacy mode is hybrid but ANTHROPIC_API_KEY not set') async def submit(self, messages, tools=None, caller='unknown'): backend_name = await self.get_backend(caller) if backend_name == 'claude': if self._claude is None: logger.warning('Claude requested but unavailable, fallback to Ollama caller=%s', caller) backend_name = 'ollama' else: try: return await self._claude.submit(messages, tools, caller) except (ClaudeTimeoutError, ClaudeRateLimitError) as exc: logger.warning('Claude failed caller=%s (%s), falling back to Ollama', caller, exc) return await self._ollama.submit(messages, tools, caller) return await self._ollama.submit(messages, tools, caller) async def get_backend(self, caller): if caller in HIPAA_LOCKED_AGENTS: return 'ollama' if self._privacy_mode == 'local': return 'ollama' if self._privacy_mode == 'cloud': return 'claude' if self._privacy_mode == 'hybrid': if self._config_store: try: db_val = await self._config_store.get_backend(caller) if db_val: return db_val except Exception as exc: logger.warning('LLMConfigStore lookup failed: %s', exc) env_key = f'AGENT_BACKEND_{caller.upper()}' env_val = os.environ.get(env_key) if env_val in ('ollama', 'claude'): return env_val return 'ollama' logger.error('Unknown privacy mode %s, defaulting to ollama', self._privacy_mode) return 'ollama' async def set_backend(self, caller, backend, set_by, note=None): if caller in HIPAA_LOCKED_AGENTS: raise ValueError(f'Cannot override backend for HIPAA-locked agent: {caller}') if backend not in ('ollama', 'claude'): raise ValueError(f'Invalid backend: {backend}') if not self._config_store: raise RuntimeError('No Postgres pool for runtime config store') await self._config_store.set_backend(caller, backend, set_by, note) async def set_privacy_mode(self, mode, set_by): if mode not in ('local', 'hybrid', 'cloud'): raise ValueError(f'Invalid privacy mode: {mode}') self._privacy_mode = mode if self._config_store: await self._config_store.set_backend('__system__', mode, set_by, f'Privacy mode changed to {mode}') if mode == 'local': self._claude = None logger.info('Privacy mode set to local - ClaudeBackend disabled') elif mode in ('hybrid', 'cloud') and self._claude is None: api_key = getattr(self._config, 'anthropic_api_key', None) if api_key: from .claude_backend import ClaudeBackend self._claude = ClaudeBackend( api_key=api_key, model=self._config.claude_model, timeout=self._config.claude_timeout, max_concurrent=self._config.claude_max_concurrent) logger.info('Privacy mode set to %s by user_id=%s', mode, set_by) async def get_status(self): s = {'privacy_mode': self._privacy_mode, 'ollama': {'active': self._ollama.active_count}} if self._claude: s['claude'] = {'active': self._claude.active_count} else: s['claude'] = {'available': False, 'reason': 'local mode or no API key'} return s @property def ollama_queue_depth(self): return self._ollama.active_count @property def claude_active_count(self): return self._claude.active_count if self._claude else 0