Adds a new specialist agent that gives the AI system control over its own infrastructure: - sysops_tools.py: docker SDK (ps/logs/restart) + git CLI (pull/status/log) + Odoo channel notifier for autonomous action broadcasts - sysops_agent.py: BaseAgent subclass handling on-demand chat requests, auto_heal() triggered by health failures, and sweep() for audits - Background auto-heal loop (main.py): runs every 2 minutes, calls _get_failing_systems() and triggers auto_heal() when degraded - health.py: extracted _get_failing_systems() helper reused by both the /health/detailed endpoint and the auto-heal loop - docker-compose.yml: mount docker socket + /root/odoo workspace + SSH keys for git authentication - Dockerfile: add git to apt-get - requirements.txt: add docker==7.1.0 Python SDK Auto-heal behavior: - Detects failing containers, restarts them, notifies all bot DM channels - Ollama (192.168.2.9) is flagged as external and skipped - On-demand via chat: "restart agent", "check logs", "pull latest code" Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
138 lines
5.0 KiB
Python
138 lines
5.0 KiB
Python
from __future__ import annotations
|
|
import asyncio
|
|
import functools
|
|
import logging
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
REPO_PATH = '/workspace/odoo-ai'
|
|
|
|
ALLOWED_CONTAINERS = frozenset({
|
|
'activeblue-agent',
|
|
'activeblue-agent-db',
|
|
'odoo-web-1',
|
|
'odoo-db-1',
|
|
})
|
|
|
|
# Maps health-check system names to the container responsible for them.
|
|
# 'ollama' is external (192.168.2.9) and cannot be managed from here.
|
|
SYSTEM_CONTAINER_MAP = {
|
|
'db': 'activeblue-agent-db',
|
|
'odoo': 'odoo-web-1',
|
|
'master_agent': 'activeblue-agent',
|
|
}
|
|
|
|
|
|
class SysopsTools:
|
|
def __init__(self, odoo=None):
|
|
self._odoo = odoo
|
|
self._docker_client = None
|
|
|
|
def _get_docker(self):
|
|
if self._docker_client is None:
|
|
import docker
|
|
self._docker_client = docker.from_env()
|
|
return self._docker_client
|
|
|
|
async def _docker(self, fn, *args, **kwargs):
|
|
loop = asyncio.get_event_loop()
|
|
client = self._get_docker()
|
|
return await loop.run_in_executor(None, functools.partial(fn, client, *args, **kwargs))
|
|
|
|
async def docker_ps(self) -> list[dict]:
|
|
def _ps(client):
|
|
return [
|
|
{
|
|
'name': c.name,
|
|
'status': c.status,
|
|
'id': c.short_id,
|
|
'image': c.image.tags[0] if c.image.tags else str(c.image.id)[:12],
|
|
}
|
|
for c in client.containers.list(all=True)
|
|
]
|
|
return await self._docker(_ps)
|
|
|
|
async def docker_logs(self, container: str, lines: int = 50) -> str:
|
|
if container not in ALLOWED_CONTAINERS:
|
|
raise ValueError(f'Container {container!r} not in allowed list')
|
|
def _logs(client):
|
|
c = client.containers.get(container)
|
|
return c.logs(tail=lines, stream=False).decode(errors='replace')
|
|
return await self._docker(_logs)
|
|
|
|
async def docker_restart(self, container: str) -> str:
|
|
if container not in ALLOWED_CONTAINERS:
|
|
raise ValueError(f'Container {container!r} not in allowed list')
|
|
def _restart(client):
|
|
c = client.containers.get(container)
|
|
c.restart(timeout=30)
|
|
await self._docker(_restart)
|
|
logger.info('sysops: restarted %s', container)
|
|
return f'{container} restarted'
|
|
|
|
async def _run_git(self, *args: str, cwd: str = REPO_PATH) -> str:
|
|
proc = await asyncio.create_subprocess_exec(
|
|
'git', *args,
|
|
stdout=asyncio.subprocess.PIPE,
|
|
stderr=asyncio.subprocess.STDOUT,
|
|
cwd=cwd,
|
|
)
|
|
try:
|
|
out, _ = await asyncio.wait_for(proc.communicate(), timeout=60)
|
|
return out.decode(errors='replace').strip()
|
|
except asyncio.TimeoutError:
|
|
try:
|
|
proc.kill()
|
|
except Exception:
|
|
pass
|
|
raise TimeoutError('git command timed out')
|
|
|
|
async def git_pull(self, repo_path: str = REPO_PATH) -> str:
|
|
out = await self._run_git('pull', 'origin', 'main', cwd=repo_path)
|
|
logger.info('sysops git pull: %s', out)
|
|
return out or 'Already up to date.'
|
|
|
|
async def git_status(self, repo_path: str = REPO_PATH) -> str:
|
|
out = await self._run_git('status', '--short', cwd=repo_path)
|
|
return out or 'Clean working tree'
|
|
|
|
async def git_log(self, repo_path: str = REPO_PATH, n: int = 5) -> str:
|
|
return await self._run_git('log', f'--max-count={n}', '--oneline', '--no-color', cwd=repo_path)
|
|
|
|
async def notify_all_bot_channels(self, message: str) -> int:
|
|
"""Post a message to every DM channel where the AI bot is a member."""
|
|
if not self._odoo:
|
|
return 0
|
|
try:
|
|
users = await self._odoo.search_read(
|
|
'res.users',
|
|
[('login', 'in', ('activeblue_ai_bot', 'activeblue_ai_bot@local'))],
|
|
['id', 'partner_id'], limit=1,
|
|
)
|
|
if not users:
|
|
return 0
|
|
bot_partner_id = users[0]['partner_id'][0]
|
|
channels = await self._odoo.search_read(
|
|
'discuss.channel',
|
|
[('channel_member_ids.partner_id', '=', bot_partner_id),
|
|
('channel_type', '=', 'chat')],
|
|
['id'], limit=50,
|
|
)
|
|
count = 0
|
|
html = message.replace('\n', '<br>')
|
|
for ch in channels:
|
|
try:
|
|
await self._odoo.call('discuss.channel', 'message_post', [[ch['id']]], {
|
|
'body': html,
|
|
'author_id': bot_partner_id,
|
|
'message_type': 'comment',
|
|
'subtype_xmlid': 'mail.mt_comment',
|
|
})
|
|
count += 1
|
|
except Exception as exc:
|
|
logger.warning('sysops notify ch=%s failed: %s', ch['id'], exc)
|
|
return count
|
|
except Exception as exc:
|
|
logger.warning('notify_all_bot_channels failed: %s', exc)
|
|
return 0
|