diff --git a/README.md b/README.md index 6e66e2e..f5f0e51 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,17 @@ # ActiveBlue AI -Multi-agent AI system integrated with Odoo 18 Community Edition. +Multi-agent AI system integrated with Odoo 18 Community Edition, powered by **Ollama** (`activeblue-chat` / llama-based model) running fully on-premise. + +## What's New (May 2026) + +- **Ollama cold-start fix**: `activeblue-chat` model takes ~124s to load from disk; timeout raised to 300s (enforced in code regardless of env var) and the model is pre-warmed at service startup so the first user message never times out. +- **Model pre-warming**: `_prewarm_ollama()` is launched as a background task during lifespan startup, loading the model into VRAM before any user traffic arrives. +- **Improved health check**: `/health/detailed` now queries Ollama's `ps` endpoint to report whether the model is loaded (`ok`) or still loading (`warming`), giving accurate bot online/offline status. +- **Comprehensive unit test suite**: 433 tests across all 8 specialist agents, all tool layers, PeerBus, AgentRegistry, ToolCallValidator, and base agent lifecycle. Run with `.venv-test/bin/python -m pytest tests/ -q`. +- **Tool count enforcement**: Each specialist agent is validated at startup to have ≤ 8 tools (`AgentConfigError` otherwise). +- **PeerBus inter-agent communication**: Agents can call each other with depth-limited routing, timeout safety, and call-log tracking. +- **Auto-RAG**: All agents automatically fetch Odoo 18 workflow guidance from `odoo_doc_agent` before answering. +- **Auto-heal loop**: Background task calls `sysops_agent.auto_heal()` every 2 minutes if any system is degraded. ## Architecture @@ -100,10 +111,22 @@ See `.env.example` for the full list. Key variables: ### Running tests ```bash +# Using the project test venv (recommended) +.venv-test/bin/python -m pytest tests/ -q + +# Or install manually pip install pytest pytest-asyncio pytest tests/ -v ``` +Test coverage (433 passing, all on Ollama/local mode): +- `tests/test_registry.py` — AgentRegistry lifecycle +- `tests/test_peer_bus.py` — PeerBus routing, depth limits, timeouts +- `tests/test_tool_validator.py` — ToolCallValidator, type coercion, enum guards +- `tests/test_*_tools.py` — 8 files covering every tool method (finance, accounting, crm, sales, project, elearning, employees, expenses) +- `tests/test_*_agent.py` — 8 files covering plan/gather/reason/act/report/sweep/peer_bus for each specialist agent +- `tests/test_dispatch_router.py`, `test_e2e_dispatch.py`, `test_llm_router.py`, `test_odoo_client.py` — integration tests + ### Project structure ``` diff --git a/agent_service/config.py b/agent_service/config.py index 6428af1..0c0d59a 100644 --- a/agent_service/config.py +++ b/agent_service/config.py @@ -14,7 +14,7 @@ class Settings(BaseSettings): # Ollama ollama_url: str = 'http://localhost:11434' ollama_model: str = 'activeblue-chat' - ollama_timeout: int = 120 + ollama_timeout: int = 300 ollama_max_concurrent: int = 2 # Set to a vision-capable model (e.g. llama3.2-vision:11b) to use # vision OCR for receipt images instead of Tesseract. Leave empty diff --git a/agent_service/llm/ollama_backend.py b/agent_service/llm/ollama_backend.py index 99b2b84..acbaf38 100644 --- a/agent_service/llm/ollama_backend.py +++ b/agent_service/llm/ollama_backend.py @@ -6,10 +6,12 @@ logger = logging.getLogger(__name__) class OllamaBackend: - def __init__(self, url, model, timeout=120, max_concurrent=2): + _MIN_TIMEOUT = 300 # activeblue-chat needs ~124s to load from disk + + def __init__(self, url, model, timeout=300, max_concurrent=2): self._url = url self._model = model - self._timeout = timeout + self._timeout = max(timeout, self._MIN_TIMEOUT) self._semaphore = asyncio.Semaphore(max_concurrent) self._active = 0 @@ -78,5 +80,24 @@ class OllamaBackend: except Exception as exc: raise OllamaUnavailableError(f'Ollama ping failed: {exc}') from exc + async def warm_model(self) -> None: + """Pre-load the configured model into VRAM via a minimal inference call.""" + import ollama + logger.info('ollama warm_model=%s starting (timeout=%ds)', self._model, self._timeout) + t0 = time.monotonic() + try: + client = ollama.AsyncClient(host=self._url) + await asyncio.wait_for( + client.chat(model=self._model, messages=[{'role': 'user', 'content': 'hi'}]), + timeout=self._timeout, + ) + ms = int((time.monotonic() - t0) * 1000) + logger.info('ollama warm_model=%s ready in %dms', self._model, ms) + except asyncio.TimeoutError: + logger.warning('ollama warm_model=%s timed out after %ds — model may still be loading', + self._model, self._timeout) + except Exception as exc: + logger.warning('ollama warm_model=%s failed: %s', self._model, exc) + @property def active_count(self): return self._active diff --git a/agent_service/main.py b/agent_service/main.py index f3e32a1..80f36d8 100644 --- a/agent_service/main.py +++ b/agent_service/main.py @@ -74,6 +74,8 @@ async def lifespan(app: FastAPI): llm_router = LLMRouter(config=settings, pg_pool=pool) app_state.set_llm_router(llm_router) logger.info('LLM router ready (mode=%s)', settings.llm_privacy_mode) + if hasattr(llm_router, '_ollama') and llm_router._ollama: + asyncio.create_task(_prewarm_ollama(llm_router._ollama)) except Exception as exc: logger.error('Failed to init LLM router: %s', exc) llm_router = None @@ -199,6 +201,12 @@ def _register_specialist_agents(agent_registry, peer_bus, odoo, llm_router) -> N logger.warning('Could not register %s: %s', agent_name, exc) +async def _prewarm_ollama(ollama_backend) -> None: + """Load the configured model into VRAM at startup to avoid cold-start timeouts.""" + await asyncio.sleep(5) + await ollama_backend.warm_model() + + async def _auto_heal_loop(interval: int = 120) -> None: """Check health every interval seconds; call sysops_agent.auto_heal() if degraded.""" await asyncio.sleep(90) # let startup settle before first check diff --git a/agent_service/routers/health.py b/agent_service/routers/health.py index 214ff9b..301c6ed 100644 --- a/agent_service/routers/health.py +++ b/agent_service/routers/health.py @@ -63,6 +63,8 @@ async def _get_failing_systems() -> list[str]: await asyncio.wait_for(llm_router._ollama.ping(), timeout=5) except Exception: failing.append('ollama') + elif not llm_router: + failing.append('ollama') return failing @@ -96,13 +98,25 @@ async def health_detailed(): except Exception as exc: odoo_status = f'error: {exc}' - # Ollama check + # Ollama check — verify reachability and that the configured model is loaded ollama_status = 'unavailable' llm_router = get_llm_router() if llm_router and hasattr(llm_router, '_ollama'): try: await asyncio.wait_for(llm_router._ollama.ping(), timeout=5) - ollama_status = 'ok' + # Check whether the model is already warm in VRAM + import ollama as _ollama_pkg + client = _ollama_pkg.AsyncClient(host=llm_router._ollama._url) + try: + ps_resp = await asyncio.wait_for(client.ps(), timeout=5) + loaded = getattr(ps_resp, 'models', ps_resp) if not isinstance(ps_resp, dict) else ps_resp.get('models', []) + model_names = [getattr(m, 'model', None) or (m.get('model') if isinstance(m, dict) else None) for m in loaded] + if any(llm_router._ollama._model in (n or '') for n in model_names): + ollama_status = 'ok' + else: + ollama_status = 'warming' + except Exception: + ollama_status = 'ok' # ps() unsupported — treat as ok if ping succeeded except Exception as exc: ollama_status = f'error: {exc}'