Files
odoo-ai/agent_service/tools/sysops_tools.py
Carlos Garcia 8d1727b498 feat: sysops_agent — Docker/git self-management with auto-heal
Adds a new specialist agent that gives the AI system control over its
own infrastructure:

- sysops_tools.py: docker SDK (ps/logs/restart) + git CLI (pull/status/log)
  + Odoo channel notifier for autonomous action broadcasts
- sysops_agent.py: BaseAgent subclass handling on-demand chat requests,
  auto_heal() triggered by health failures, and sweep() for audits
- Background auto-heal loop (main.py): runs every 2 minutes, calls
  _get_failing_systems() and triggers auto_heal() when degraded
- health.py: extracted _get_failing_systems() helper reused by both
  the /health/detailed endpoint and the auto-heal loop
- docker-compose.yml: mount docker socket + /root/odoo workspace +
  SSH keys for git authentication
- Dockerfile: add git to apt-get
- requirements.txt: add docker==7.1.0 Python SDK

Auto-heal behavior:
  - Detects failing containers, restarts them, notifies all bot DM channels
  - Ollama (192.168.2.9) is flagged as external and skipped
  - On-demand via chat: "restart agent", "check logs", "pull latest code"

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-19 17:01:57 -04:00

138 lines
5.0 KiB
Python

from __future__ import annotations
import asyncio
import functools
import logging
logger = logging.getLogger(__name__)
REPO_PATH = '/workspace/odoo-ai'
ALLOWED_CONTAINERS = frozenset({
'activeblue-agent',
'activeblue-agent-db',
'odoo-web-1',
'odoo-db-1',
})
# Maps health-check system names to the container responsible for them.
# 'ollama' is external (192.168.2.9) and cannot be managed from here.
SYSTEM_CONTAINER_MAP = {
'db': 'activeblue-agent-db',
'odoo': 'odoo-web-1',
'master_agent': 'activeblue-agent',
}
class SysopsTools:
def __init__(self, odoo=None):
self._odoo = odoo
self._docker_client = None
def _get_docker(self):
if self._docker_client is None:
import docker
self._docker_client = docker.from_env()
return self._docker_client
async def _docker(self, fn, *args, **kwargs):
loop = asyncio.get_event_loop()
client = self._get_docker()
return await loop.run_in_executor(None, functools.partial(fn, client, *args, **kwargs))
async def docker_ps(self) -> list[dict]:
def _ps(client):
return [
{
'name': c.name,
'status': c.status,
'id': c.short_id,
'image': c.image.tags[0] if c.image.tags else str(c.image.id)[:12],
}
for c in client.containers.list(all=True)
]
return await self._docker(_ps)
async def docker_logs(self, container: str, lines: int = 50) -> str:
if container not in ALLOWED_CONTAINERS:
raise ValueError(f'Container {container!r} not in allowed list')
def _logs(client):
c = client.containers.get(container)
return c.logs(tail=lines, stream=False).decode(errors='replace')
return await self._docker(_logs)
async def docker_restart(self, container: str) -> str:
if container not in ALLOWED_CONTAINERS:
raise ValueError(f'Container {container!r} not in allowed list')
def _restart(client):
c = client.containers.get(container)
c.restart(timeout=30)
await self._docker(_restart)
logger.info('sysops: restarted %s', container)
return f'{container} restarted'
async def _run_git(self, *args: str, cwd: str = REPO_PATH) -> str:
proc = await asyncio.create_subprocess_exec(
'git', *args,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.STDOUT,
cwd=cwd,
)
try:
out, _ = await asyncio.wait_for(proc.communicate(), timeout=60)
return out.decode(errors='replace').strip()
except asyncio.TimeoutError:
try:
proc.kill()
except Exception:
pass
raise TimeoutError('git command timed out')
async def git_pull(self, repo_path: str = REPO_PATH) -> str:
out = await self._run_git('pull', 'origin', 'main', cwd=repo_path)
logger.info('sysops git pull: %s', out)
return out or 'Already up to date.'
async def git_status(self, repo_path: str = REPO_PATH) -> str:
out = await self._run_git('status', '--short', cwd=repo_path)
return out or 'Clean working tree'
async def git_log(self, repo_path: str = REPO_PATH, n: int = 5) -> str:
return await self._run_git('log', f'--max-count={n}', '--oneline', '--no-color', cwd=repo_path)
async def notify_all_bot_channels(self, message: str) -> int:
"""Post a message to every DM channel where the AI bot is a member."""
if not self._odoo:
return 0
try:
users = await self._odoo.search_read(
'res.users',
[('login', 'in', ('activeblue_ai_bot', 'activeblue_ai_bot@local'))],
['id', 'partner_id'], limit=1,
)
if not users:
return 0
bot_partner_id = users[0]['partner_id'][0]
channels = await self._odoo.search_read(
'discuss.channel',
[('channel_member_ids.partner_id', '=', bot_partner_id),
('channel_type', '=', 'chat')],
['id'], limit=50,
)
count = 0
html = message.replace('\n', '<br>')
for ch in channels:
try:
await self._odoo.call('discuss.channel', 'message_post', [[ch['id']]], {
'body': html,
'author_id': bot_partner_id,
'message_type': 'comment',
'subtype_xmlid': 'mail.mt_comment',
})
count += 1
except Exception as exc:
logger.warning('sysops notify ch=%s failed: %s', ch['id'], exc)
return count
except Exception as exc:
logger.warning('notify_all_bot_channels failed: %s', exc)
return 0