fix: explicit per-system health checks gate online status

action_ping now checks db, odoo, ollama, and master_agent individually.
All four must report 'ok' for the bot to go online. Presence is updated
immediately inside action_ping (not as a separate cron step), so every
ping — whether from the cron or a manual button press — atomically checks
all systems and sets the correct online/offline/error state.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Carlos Garcia
2026-05-19 16:07:59 -04:00
parent 99cc19195a
commit eeea45b37f

View File

@@ -56,38 +56,38 @@ class AbAiBot(models.Model):
headers['X-ActiveBlue-Signature'] = self.webhook_secret
return headers
# Systems that must all report 'ok' for the bot to be considered online.
REQUIRED_SYSTEMS = ['db', 'odoo', 'ollama', 'master_agent']
def action_ping(self):
self.ensure_one()
url = self._get_service_url() + '/health/detailed'
try:
resp = requests.get(url, timeout=5, headers=self._build_headers())
if resp.status_code != 200:
self.write({'status': 'error'})
self.write({'status': 'error', 'last_ping': fields.Datetime.now()})
self._sync_bot_user_presence(online=False)
return {'type': 'ir.actions.client', 'tag': 'display_notification',
'params': {'message': _('AI service returned %s') % resp.status_code, 'type': 'warning'}}
data = resp.json() if resp.content else {}
# Bot is only "online" when every backend the LLM router needs is ok.
# Local privacy mode requires Ollama; cloud requires Claude. We treat
# any backend whose status is not 'ok' as a hard failure for the
# privacy mode in use, plus DB and master agent are always required.
db_ok = data.get('db') == 'ok'
master_ok = data.get('master_agent') == 'ok'
mode = data.get('privacy_mode') or self.privacy_mode
ollama_ok = data.get('ollama') == 'ok'
llm_ok = ollama_ok if mode == 'local' else True
if db_ok and master_ok and llm_ok:
# Check every required system individually.
checks = {s: data.get(s) == 'ok' for s in self.REQUIRED_SYSTEMS}
failing = [s for s, ok in checks.items() if not ok]
if not failing:
self.write({'status': 'online', 'last_ping': fields.Datetime.now()})
self._sync_bot_user_presence(online=True)
return {'type': 'ir.actions.client', 'tag': 'display_notification',
'params': {'message': _('AI service is online'), 'type': 'success'}}
'params': {'message': _('AI service is online — all systems operational'), 'type': 'success'}}
self.write({'status': 'error', 'last_ping': fields.Datetime.now()})
reason = ', '.join(
f'{k}={data.get(k)}' for k in ('db', 'master_agent', 'ollama')
if data.get(k) and data.get(k) != 'ok'
) or 'degraded'
self._sync_bot_user_presence(online=False)
return {'type': 'ir.actions.client', 'tag': 'display_notification',
'params': {'message': _('AI service degraded: %s') % reason, 'type': 'warning'}}
'params': {'message': _('AI service degraded — failing: %s') % ', '.join(failing), 'type': 'warning'}}
except Exception as exc:
self.write({'status': 'offline'})
self.write({'status': 'offline', 'last_ping': fields.Datetime.now()})
self._sync_bot_user_presence(online=False)
return {'type': 'ir.actions.client', 'tag': 'display_notification',
'params': {'message': _('AI service unreachable: %s') % exc, 'type': 'danger'}}
@@ -151,17 +151,11 @@ class AbAiBot(models.Model):
@api.model
def cron_ping_all(self):
any_online = False
for bot in self.search([('active', '=', True)]):
try:
bot.action_ping()
if bot.status == 'online':
any_online = True
except Exception as exc:
_logger.warning('Ping failed for bot %s: %s', bot.id, exc)
# Mirror agent-service health to the bot user's Discuss presence so it
# shows a green dot when the agent is reachable.
self._sync_bot_user_presence(online=any_online)
@api.model
def _sync_bot_user_presence(self, online):