feat: sysops_agent — Docker/git self-management with auto-heal
Adds a new specialist agent that gives the AI system control over its own infrastructure: - sysops_tools.py: docker SDK (ps/logs/restart) + git CLI (pull/status/log) + Odoo channel notifier for autonomous action broadcasts - sysops_agent.py: BaseAgent subclass handling on-demand chat requests, auto_heal() triggered by health failures, and sweep() for audits - Background auto-heal loop (main.py): runs every 2 minutes, calls _get_failing_systems() and triggers auto_heal() when degraded - health.py: extracted _get_failing_systems() helper reused by both the /health/detailed endpoint and the auto-heal loop - docker-compose.yml: mount docker socket + /root/odoo workspace + SSH keys for git authentication - Dockerfile: add git to apt-get - requirements.txt: add docker==7.1.0 Python SDK Auto-heal behavior: - Detects failing containers, restarts them, notifies all bot DM channels - Ollama (192.168.2.9) is flagged as external and skipped - On-demand via chat: "restart agent", "check logs", "pull latest code" Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
137
agent_service/tools/sysops_tools.py
Normal file
137
agent_service/tools/sysops_tools.py
Normal file
@@ -0,0 +1,137 @@
|
||||
from __future__ import annotations
|
||||
import asyncio
|
||||
import functools
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
REPO_PATH = '/workspace/odoo-ai'
|
||||
|
||||
ALLOWED_CONTAINERS = frozenset({
|
||||
'activeblue-agent',
|
||||
'activeblue-agent-db',
|
||||
'odoo-web-1',
|
||||
'odoo-db-1',
|
||||
})
|
||||
|
||||
# Maps health-check system names to the container responsible for them.
|
||||
# 'ollama' is external (192.168.2.9) and cannot be managed from here.
|
||||
SYSTEM_CONTAINER_MAP = {
|
||||
'db': 'activeblue-agent-db',
|
||||
'odoo': 'odoo-web-1',
|
||||
'master_agent': 'activeblue-agent',
|
||||
}
|
||||
|
||||
|
||||
class SysopsTools:
|
||||
def __init__(self, odoo=None):
|
||||
self._odoo = odoo
|
||||
self._docker_client = None
|
||||
|
||||
def _get_docker(self):
|
||||
if self._docker_client is None:
|
||||
import docker
|
||||
self._docker_client = docker.from_env()
|
||||
return self._docker_client
|
||||
|
||||
async def _docker(self, fn, *args, **kwargs):
|
||||
loop = asyncio.get_event_loop()
|
||||
client = self._get_docker()
|
||||
return await loop.run_in_executor(None, functools.partial(fn, client, *args, **kwargs))
|
||||
|
||||
async def docker_ps(self) -> list[dict]:
|
||||
def _ps(client):
|
||||
return [
|
||||
{
|
||||
'name': c.name,
|
||||
'status': c.status,
|
||||
'id': c.short_id,
|
||||
'image': c.image.tags[0] if c.image.tags else str(c.image.id)[:12],
|
||||
}
|
||||
for c in client.containers.list(all=True)
|
||||
]
|
||||
return await self._docker(_ps)
|
||||
|
||||
async def docker_logs(self, container: str, lines: int = 50) -> str:
|
||||
if container not in ALLOWED_CONTAINERS:
|
||||
raise ValueError(f'Container {container!r} not in allowed list')
|
||||
def _logs(client):
|
||||
c = client.containers.get(container)
|
||||
return c.logs(tail=lines, stream=False).decode(errors='replace')
|
||||
return await self._docker(_logs)
|
||||
|
||||
async def docker_restart(self, container: str) -> str:
|
||||
if container not in ALLOWED_CONTAINERS:
|
||||
raise ValueError(f'Container {container!r} not in allowed list')
|
||||
def _restart(client):
|
||||
c = client.containers.get(container)
|
||||
c.restart(timeout=30)
|
||||
await self._docker(_restart)
|
||||
logger.info('sysops: restarted %s', container)
|
||||
return f'{container} restarted'
|
||||
|
||||
async def _run_git(self, *args: str, cwd: str = REPO_PATH) -> str:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
'git', *args,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.STDOUT,
|
||||
cwd=cwd,
|
||||
)
|
||||
try:
|
||||
out, _ = await asyncio.wait_for(proc.communicate(), timeout=60)
|
||||
return out.decode(errors='replace').strip()
|
||||
except asyncio.TimeoutError:
|
||||
try:
|
||||
proc.kill()
|
||||
except Exception:
|
||||
pass
|
||||
raise TimeoutError('git command timed out')
|
||||
|
||||
async def git_pull(self, repo_path: str = REPO_PATH) -> str:
|
||||
out = await self._run_git('pull', 'origin', 'main', cwd=repo_path)
|
||||
logger.info('sysops git pull: %s', out)
|
||||
return out or 'Already up to date.'
|
||||
|
||||
async def git_status(self, repo_path: str = REPO_PATH) -> str:
|
||||
out = await self._run_git('status', '--short', cwd=repo_path)
|
||||
return out or 'Clean working tree'
|
||||
|
||||
async def git_log(self, repo_path: str = REPO_PATH, n: int = 5) -> str:
|
||||
return await self._run_git('log', f'--max-count={n}', '--oneline', '--no-color', cwd=repo_path)
|
||||
|
||||
async def notify_all_bot_channels(self, message: str) -> int:
|
||||
"""Post a message to every DM channel where the AI bot is a member."""
|
||||
if not self._odoo:
|
||||
return 0
|
||||
try:
|
||||
users = await self._odoo.search_read(
|
||||
'res.users',
|
||||
[('login', 'in', ('activeblue_ai_bot', 'activeblue_ai_bot@local'))],
|
||||
['id', 'partner_id'], limit=1,
|
||||
)
|
||||
if not users:
|
||||
return 0
|
||||
bot_partner_id = users[0]['partner_id'][0]
|
||||
channels = await self._odoo.search_read(
|
||||
'discuss.channel',
|
||||
[('channel_member_ids.partner_id', '=', bot_partner_id),
|
||||
('channel_type', '=', 'chat')],
|
||||
['id'], limit=50,
|
||||
)
|
||||
count = 0
|
||||
html = message.replace('\n', '<br>')
|
||||
for ch in channels:
|
||||
try:
|
||||
await self._odoo.call('discuss.channel', 'message_post', [[ch['id']]], {
|
||||
'body': html,
|
||||
'author_id': bot_partner_id,
|
||||
'message_type': 'comment',
|
||||
'subtype_xmlid': 'mail.mt_comment',
|
||||
})
|
||||
count += 1
|
||||
except Exception as exc:
|
||||
logger.warning('sysops notify ch=%s failed: %s', ch['id'], exc)
|
||||
return count
|
||||
except Exception as exc:
|
||||
logger.warning('notify_all_bot_channels failed: %s', exc)
|
||||
return 0
|
||||
Reference in New Issue
Block a user