diff --git a/README.md b/README.md
index 6e66e2e..f5f0e51 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,17 @@
 # ActiveBlue AI
 
-Multi-agent AI system integrated with Odoo 18 Community Edition.
+Multi-agent AI system integrated with Odoo 18 Community Edition, powered by **Ollama** (`activeblue-chat` / llama-based model) running fully on-premise.
+
+## What's New (May 2026)
+
+- **Ollama cold-start fix**: `activeblue-chat` model takes ~124s to load from disk; timeout raised to 300s (enforced in code regardless of env var) and the model is pre-warmed at service startup so the first user message never times out.
+- **Model pre-warming**: `_prewarm_ollama()` is launched as a background task during lifespan startup, loading the model into VRAM before any user traffic arrives.
+- **Improved health check**: `/health/detailed` now queries Ollama's `ps` endpoint to report whether the model is loaded (`ok`) or still loading (`warming`), giving accurate bot online/offline status.
+- **Comprehensive unit test suite**: 433 tests across all 8 specialist agents, all tool layers, PeerBus, AgentRegistry, ToolCallValidator, and base agent lifecycle. Run with `.venv-test/bin/python -m pytest tests/ -q`.
+- **Tool count enforcement**: Each specialist agent is validated at startup to have ≤ 8 tools (`AgentConfigError` otherwise).
+- **PeerBus inter-agent communication**: Agents can call each other with depth-limited routing, timeout safety, and call-log tracking.
+- **Auto-RAG**: All agents automatically fetch Odoo 18 workflow guidance from `odoo_doc_agent` before answering.
+- **Auto-heal loop**: Background task calls `sysops_agent.auto_heal()` every 2 minutes if any system is degraded.
 
 ## Architecture
 
@@ -100,10 +111,22 @@ See `.env.example` for the full list. Key variables:
 ### Running tests
 
 ```bash
+# Using the project test venv (recommended)
+.venv-test/bin/python -m pytest tests/ -q
+
+# Or install manually
 pip install pytest pytest-asyncio
 pytest tests/ -v
 ```
 
+Test coverage (433 passing, all on Ollama/local mode):
+- `tests/test_registry.py` — AgentRegistry lifecycle
+- `tests/test_peer_bus.py` — PeerBus routing, depth limits, timeouts
+- `tests/test_tool_validator.py` — ToolCallValidator, type coercion, enum guards
+- `tests/test_*_tools.py` — 8 files covering every tool method (finance, accounting, crm, sales, project, elearning, employees, expenses)
+- `tests/test_*_agent.py` — 8 files covering plan/gather/reason/act/report/sweep/peer_bus for each specialist agent
+- `tests/test_dispatch_router.py`, `test_e2e_dispatch.py`, `test_llm_router.py`, `test_odoo_client.py` — integration tests
+
 ### Project structure
 
 ```
diff --git a/agent_service/config.py b/agent_service/config.py
index 6428af1..0c0d59a 100644
--- a/agent_service/config.py
+++ b/agent_service/config.py
@@ -14,7 +14,7 @@ class Settings(BaseSettings):
     # Ollama
     ollama_url: str = 'http://localhost:11434'
     ollama_model: str = 'activeblue-chat'
-    ollama_timeout: int = 120
+    ollama_timeout: int = 300
     ollama_max_concurrent: int = 2
     # Set to a vision-capable model (e.g. llama3.2-vision:11b) to use
     # vision OCR for receipt images instead of Tesseract.  Leave empty
diff --git a/agent_service/llm/ollama_backend.py b/agent_service/llm/ollama_backend.py
index 99b2b84..acbaf38 100644
--- a/agent_service/llm/ollama_backend.py
+++ b/agent_service/llm/ollama_backend.py
@@ -6,10 +6,12 @@ logger = logging.getLogger(__name__)
 
 
 class OllamaBackend:
-    def __init__(self, url, model, timeout=120, max_concurrent=2):
+    _MIN_TIMEOUT = 300  # activeblue-chat needs ~124s to load from disk
+
+    def __init__(self, url, model, timeout=300, max_concurrent=2):
         self._url = url
         self._model = model
-        self._timeout = timeout
+        self._timeout = max(timeout, self._MIN_TIMEOUT)
         self._semaphore = asyncio.Semaphore(max_concurrent)
         self._active = 0
 
@@ -78,5 +80,24 @@ class OllamaBackend:
         except Exception as exc:
             raise OllamaUnavailableError(f'Ollama ping failed: {exc}') from exc
 
+    async def warm_model(self) -> None:
+        """Pre-load the configured model into VRAM via a minimal inference call."""
+        import ollama
+        logger.info('ollama warm_model=%s starting (timeout=%ds)', self._model, self._timeout)
+        t0 = time.monotonic()
+        try:
+            client = ollama.AsyncClient(host=self._url)
+            await asyncio.wait_for(
+                client.chat(model=self._model, messages=[{'role': 'user', 'content': 'hi'}]),
+                timeout=self._timeout,
+            )
+            ms = int((time.monotonic() - t0) * 1000)
+            logger.info('ollama warm_model=%s ready in %dms', self._model, ms)
+        except asyncio.TimeoutError:
+            logger.warning('ollama warm_model=%s timed out after %ds — model may still be loading',
+                           self._model, self._timeout)
+        except Exception as exc:
+            logger.warning('ollama warm_model=%s failed: %s', self._model, exc)
+
     @property
     def active_count(self): return self._active
diff --git a/agent_service/main.py b/agent_service/main.py
index f3e32a1..80f36d8 100644
--- a/agent_service/main.py
+++ b/agent_service/main.py
@@ -74,6 +74,8 @@ async def lifespan(app: FastAPI):
         llm_router = LLMRouter(config=settings, pg_pool=pool)
         app_state.set_llm_router(llm_router)
         logger.info('LLM router ready (mode=%s)', settings.llm_privacy_mode)
+        if hasattr(llm_router, '_ollama') and llm_router._ollama:
+            asyncio.create_task(_prewarm_ollama(llm_router._ollama))
     except Exception as exc:
         logger.error('Failed to init LLM router: %s', exc)
         llm_router = None
@@ -199,6 +201,12 @@ def _register_specialist_agents(agent_registry, peer_bus, odoo, llm_router) -> N
             logger.warning('Could not register %s: %s', agent_name, exc)
 
 
+async def _prewarm_ollama(ollama_backend) -> None:
+    """Load the configured model into VRAM at startup to avoid cold-start timeouts."""
+    await asyncio.sleep(5)
+    await ollama_backend.warm_model()
+
+
 async def _auto_heal_loop(interval: int = 120) -> None:
     """Check health every interval seconds; call sysops_agent.auto_heal() if degraded."""
     await asyncio.sleep(90)  # let startup settle before first check
diff --git a/agent_service/routers/health.py b/agent_service/routers/health.py
index 214ff9b..301c6ed 100644
--- a/agent_service/routers/health.py
+++ b/agent_service/routers/health.py
@@ -63,6 +63,8 @@ async def _get_failing_systems() -> list[str]:
             await asyncio.wait_for(llm_router._ollama.ping(), timeout=5)
         except Exception:
             failing.append('ollama')
+    elif not llm_router:
+        failing.append('ollama')
 
     return failing
 
@@ -96,13 +98,25 @@ async def health_detailed():
         except Exception as exc:
             odoo_status = f'error: {exc}'
 
-    # Ollama check
+    # Ollama check — verify reachability and that the configured model is loaded
     ollama_status = 'unavailable'
     llm_router = get_llm_router()
     if llm_router and hasattr(llm_router, '_ollama'):
         try:
             await asyncio.wait_for(llm_router._ollama.ping(), timeout=5)
-            ollama_status = 'ok'
+            # Check whether the model is already warm in VRAM
+            import ollama as _ollama_pkg
+            client = _ollama_pkg.AsyncClient(host=llm_router._ollama._url)
+            try:
+                ps_resp = await asyncio.wait_for(client.ps(), timeout=5)
+                loaded = getattr(ps_resp, 'models', ps_resp) if not isinstance(ps_resp, dict) else ps_resp.get('models', [])
+                model_names = [getattr(m, 'model', None) or (m.get('model') if isinstance(m, dict) else None) for m in loaded]
+                if any(llm_router._ollama._model in (n or '') for n in model_names):
+                    ollama_status = 'ok'
+                else:
+                    ollama_status = 'warming'
+            except Exception:
+                ollama_status = 'ok'  # ps() unsupported — treat as ok if ping succeeded
         except Exception as exc:
             ollama_status = f'error: {exc}'