fix: explicit per-system health checks gate online status
action_ping now checks db, odoo, ollama, and master_agent individually. All four must report 'ok' for the bot to go online. Presence is updated immediately inside action_ping (not as a separate cron step), so every ping — whether from the cron or a manual button press — atomically checks all systems and sets the correct online/offline/error state. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -56,38 +56,38 @@ class AbAiBot(models.Model):
|
|||||||
headers['X-ActiveBlue-Signature'] = self.webhook_secret
|
headers['X-ActiveBlue-Signature'] = self.webhook_secret
|
||||||
return headers
|
return headers
|
||||||
|
|
||||||
|
# Systems that must all report 'ok' for the bot to be considered online.
|
||||||
|
REQUIRED_SYSTEMS = ['db', 'odoo', 'ollama', 'master_agent']
|
||||||
|
|
||||||
def action_ping(self):
|
def action_ping(self):
|
||||||
self.ensure_one()
|
self.ensure_one()
|
||||||
url = self._get_service_url() + '/health/detailed'
|
url = self._get_service_url() + '/health/detailed'
|
||||||
try:
|
try:
|
||||||
resp = requests.get(url, timeout=5, headers=self._build_headers())
|
resp = requests.get(url, timeout=5, headers=self._build_headers())
|
||||||
if resp.status_code != 200:
|
if resp.status_code != 200:
|
||||||
self.write({'status': 'error'})
|
self.write({'status': 'error', 'last_ping': fields.Datetime.now()})
|
||||||
|
self._sync_bot_user_presence(online=False)
|
||||||
return {'type': 'ir.actions.client', 'tag': 'display_notification',
|
return {'type': 'ir.actions.client', 'tag': 'display_notification',
|
||||||
'params': {'message': _('AI service returned %s') % resp.status_code, 'type': 'warning'}}
|
'params': {'message': _('AI service returned %s') % resp.status_code, 'type': 'warning'}}
|
||||||
data = resp.json() if resp.content else {}
|
data = resp.json() if resp.content else {}
|
||||||
# Bot is only "online" when every backend the LLM router needs is ok.
|
|
||||||
# Local privacy mode requires Ollama; cloud requires Claude. We treat
|
# Check every required system individually.
|
||||||
# any backend whose status is not 'ok' as a hard failure for the
|
checks = {s: data.get(s) == 'ok' for s in self.REQUIRED_SYSTEMS}
|
||||||
# privacy mode in use, plus DB and master agent are always required.
|
failing = [s for s, ok in checks.items() if not ok]
|
||||||
db_ok = data.get('db') == 'ok'
|
|
||||||
master_ok = data.get('master_agent') == 'ok'
|
if not failing:
|
||||||
mode = data.get('privacy_mode') or self.privacy_mode
|
|
||||||
ollama_ok = data.get('ollama') == 'ok'
|
|
||||||
llm_ok = ollama_ok if mode == 'local' else True
|
|
||||||
if db_ok and master_ok and llm_ok:
|
|
||||||
self.write({'status': 'online', 'last_ping': fields.Datetime.now()})
|
self.write({'status': 'online', 'last_ping': fields.Datetime.now()})
|
||||||
|
self._sync_bot_user_presence(online=True)
|
||||||
return {'type': 'ir.actions.client', 'tag': 'display_notification',
|
return {'type': 'ir.actions.client', 'tag': 'display_notification',
|
||||||
'params': {'message': _('AI service is online'), 'type': 'success'}}
|
'params': {'message': _('AI service is online — all systems operational'), 'type': 'success'}}
|
||||||
|
|
||||||
self.write({'status': 'error', 'last_ping': fields.Datetime.now()})
|
self.write({'status': 'error', 'last_ping': fields.Datetime.now()})
|
||||||
reason = ', '.join(
|
self._sync_bot_user_presence(online=False)
|
||||||
f'{k}={data.get(k)}' for k in ('db', 'master_agent', 'ollama')
|
|
||||||
if data.get(k) and data.get(k) != 'ok'
|
|
||||||
) or 'degraded'
|
|
||||||
return {'type': 'ir.actions.client', 'tag': 'display_notification',
|
return {'type': 'ir.actions.client', 'tag': 'display_notification',
|
||||||
'params': {'message': _('AI service degraded: %s') % reason, 'type': 'warning'}}
|
'params': {'message': _('AI service degraded — failing: %s') % ', '.join(failing), 'type': 'warning'}}
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
self.write({'status': 'offline'})
|
self.write({'status': 'offline', 'last_ping': fields.Datetime.now()})
|
||||||
|
self._sync_bot_user_presence(online=False)
|
||||||
return {'type': 'ir.actions.client', 'tag': 'display_notification',
|
return {'type': 'ir.actions.client', 'tag': 'display_notification',
|
||||||
'params': {'message': _('AI service unreachable: %s') % exc, 'type': 'danger'}}
|
'params': {'message': _('AI service unreachable: %s') % exc, 'type': 'danger'}}
|
||||||
|
|
||||||
@@ -151,17 +151,11 @@ class AbAiBot(models.Model):
|
|||||||
|
|
||||||
@api.model
|
@api.model
|
||||||
def cron_ping_all(self):
|
def cron_ping_all(self):
|
||||||
any_online = False
|
|
||||||
for bot in self.search([('active', '=', True)]):
|
for bot in self.search([('active', '=', True)]):
|
||||||
try:
|
try:
|
||||||
bot.action_ping()
|
bot.action_ping()
|
||||||
if bot.status == 'online':
|
|
||||||
any_online = True
|
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
_logger.warning('Ping failed for bot %s: %s', bot.id, exc)
|
_logger.warning('Ping failed for bot %s: %s', bot.id, exc)
|
||||||
# Mirror agent-service health to the bot user's Discuss presence so it
|
|
||||||
# shows a green dot when the agent is reachable.
|
|
||||||
self._sync_bot_user_presence(online=any_online)
|
|
||||||
|
|
||||||
@api.model
|
@api.model
|
||||||
def _sync_bot_user_presence(self, online):
|
def _sync_bot_user_presence(self, online):
|
||||||
|
|||||||
Reference in New Issue
Block a user