from __future__ import annotations import logging from .base_agent import BaseAgent, AgentReport, SweepReport from ..tools.sysops_tools import SysopsTools, ALLOWED_CONTAINERS, SYSTEM_CONTAINER_MAP, REPO_PATH logger = logging.getLogger(__name__) SYSOPS_TOOLS = [ {'name': 'docker_ps', 'description': 'List all Docker containers with their current status', 'parameters': {}}, {'name': 'docker_logs', 'description': 'Retrieve recent log lines from a container', 'parameters': {'container': {'type': 'string'}, 'lines': {'type': 'integer', 'optional': True}}}, {'name': 'docker_restart', 'description': 'Restart a Docker container by name', 'parameters': {'container': {'type': 'string'}}}, {'name': 'git_pull', 'description': 'Pull latest code from git remote (main branch)', 'parameters': {'repo_path': {'type': 'string', 'optional': True}}}, {'name': 'git_status', 'description': 'Check git working tree status', 'parameters': {'repo_path': {'type': 'string', 'optional': True}}}, {'name': 'git_log', 'description': 'Show recent git commits', 'parameters': {'repo_path': {'type': 'string', 'optional': True}, 'n': {'type': 'integer', 'optional': True}}}, ] # Maps task keywords to container names for on-demand restart requests _CONTAINER_KEYWORDS = { 'agent': 'activeblue-agent', 'activeblue-agent': 'activeblue-agent', 'agent-db': 'activeblue-agent-db', 'activeblue-agent-db': 'activeblue-agent-db', 'odoo': 'odoo-web-1', 'odoo-web': 'odoo-web-1', 'odoo-web-1': 'odoo-web-1', } class SysopsAgent(BaseAgent): name = 'sysops_agent' domain = 'infrastructure' required_odoo_module = 'base' system_prompt_file = 'sysops_system.txt' tools = SYSOPS_TOOLS auto_rag = False def __init__(self, odoo, llm, peer_bus=None): super().__init__(odoo, llm, peer_bus) self._st = SysopsTools(odoo=odoo) self._current_plan: dict = {} self._actions_taken: list[str] = [] self._gathered_data: dict = {} # --- Tool bridge methods (available to _loop() if used) --- async def _tool_docker_ps(self) -> list: return await self._st.docker_ps() async def _tool_docker_logs(self, container: str, lines: int = 50) -> str: return await self._st.docker_logs(container, lines) async def _tool_docker_restart(self, container: str) -> str: return await self._st.docker_restart(container) async def _tool_git_pull(self, repo_path: str = None) -> str: return await self._st.git_pull(repo_path or REPO_PATH) async def _tool_git_status(self, repo_path: str = None) -> str: return await self._st.git_status(repo_path or REPO_PATH) async def _tool_git_log(self, repo_path: str = None, n: int = 5) -> str: return await self._st.git_log(repo_path or REPO_PATH, n) # --- BaseAgent lifecycle (on-demand chat requests) --- async def _plan(self) -> dict: task = (self._directive.task if self._directive else '').lower() plan = { 'task': task, 'wants_restart': any(k in task for k in ('restart', 'reboot')), 'wants_logs': any(k in task for k in ('log', 'crash', 'error', 'fail', 'why', 'slow')), 'wants_git': any(k in task for k in ('pull', 'git', 'update code', 'deploy', 'latest code')), 'wants_status': True, } self._current_plan = plan return plan async def _gather(self, plan: dict) -> dict: data: dict = {} data['containers'] = await self._st.docker_ps() if plan.get('wants_git'): data['git_status'] = await self._st.git_status() data['git_log'] = await self._st.git_log() if plan.get('wants_logs'): target = self._resolve_container(plan['task']) or 'activeblue-agent' data['logs_container'] = target data['logs'] = await self._st.docker_logs(target, lines=80) self._gathered_data = data return data async def _reason(self) -> dict: containers = self._gathered_data.get('containers', []) unhealthy = [ c for c in containers if c.get('name') in ALLOWED_CONTAINERS and ('exited' in c.get('status', '').lower() or 'unhealthy' in c.get('status', '').lower()) ] return {'unhealthy': unhealthy} async def _act(self, reasoning: dict) -> list: self._actions_taken = [] plan = self._current_plan task = plan.get('task', '') if plan.get('wants_restart'): target = self._resolve_container(task) if target: result = await self._st.docker_restart(target) self._actions_taken.append(result) await self._st.notify_all_bot_channels( f'[SysOps] {target} restarted on user request.') if plan.get('wants_git'): result = await self._st.git_pull() self._actions_taken.append(f'git pull: {result}') if 'Already up to date' not in result: await self._st.notify_all_bot_channels( f'[SysOps] Pulled latest code:\n{result}') return self._actions_taken async def _report(self) -> AgentReport: directive_id = self._directive.directive_id if self._directive else '' containers = self._gathered_data.get('containers', []) lines = [] managed = [c for c in containers if c.get('name') in ALLOWED_CONTAINERS] if managed: lines.append('Container status:') for c in managed: lines.append(f' {c["name"]}: {c["status"]}') if self._actions_taken: lines.append('') lines.append('Actions taken:') for a in self._actions_taken: lines.append(f' • {a}') if 'logs' in self._gathered_data: lines.append('') lines.append(f'Recent logs ({self._gathered_data.get("logs_container", "")}, last 80 lines):') tail = self._gathered_data['logs'] lines.append(tail[-2000:] if len(tail) > 2000 else tail) if 'git_log' in self._gathered_data: lines.append('') lines.append('Recent commits:') lines.append(self._gathered_data['git_log']) summary = '\n'.join(lines) if lines else 'System check complete — all containers running.' return AgentReport( directive_id=directive_id, agent=self.name, status='complete', summary=summary, actions_taken=self._actions_taken, ) # --- Auto-heal: called by background health-check loop --- async def auto_heal(self, failing_systems: list[str]) -> None: actions: list[str] = [] try: await self._st.notify_all_bot_channels( f'[SysOps] Health degraded — failing: {", ".join(failing_systems)}\n' f'Starting auto-recovery...' ) containers = await self._st.docker_ps() container_map = {c['name']: c for c in containers} for system in failing_systems: if system == 'ollama': actions.append('ollama: external host (192.168.2.9) — cannot restart from here') continue target = SYSTEM_CONTAINER_MAP.get(system) if not target: continue c_info = container_map.get(target, {}) status = c_info.get('status', 'unknown') try: await self._st.docker_restart(target) actions.append(f'{target}: restarted (was: {status})') except Exception as exc: actions.append(f'{target}: restart failed — {exc}') summary = '\n'.join(f' • {a}' for a in actions) or ' No actions taken.' await self._st.notify_all_bot_channels( f'[SysOps] Auto-recovery complete:\n{summary}\n' f'Status will update on next health check.' ) logger.info('sysops auto_heal complete: %s', actions) except Exception as exc: logger.error('sysops auto_heal error: %s', exc) try: await self._st.notify_all_bot_channels(f'[SysOps] Auto-recovery error: {exc}') except Exception: pass # --- Sweep: scheduled read-only audit --- async def sweep(self) -> SweepReport: findings: list[dict] = [] try: containers = await self._st.docker_ps() for c in containers: if c.get('name') not in ALLOWED_CONTAINERS: continue status = c.get('status', '') if 'exited' in status.lower(): findings.append({ 'type': 'container_exited', 'container': c['name'], 'status': status, 'severity': 'high', }) elif 'unhealthy' in status.lower(): findings.append({ 'type': 'container_unhealthy', 'container': c['name'], 'status': status, 'severity': 'medium', }) git_st = await self._st.git_status() if git_st != 'Clean working tree': findings.append({ 'type': 'git_uncommitted', 'details': git_st, 'severity': 'low', }) except Exception as exc: logger.error('sysops sweep error: %s', exc) return SweepReport(agent=self.name, findings=[], recommendations=[]) return SweepReport(agent=self.name, findings=findings, recommendations=[]) # --- Helpers --- def _resolve_container(self, task: str) -> str | None: for kw, name in _CONTAINER_KEYWORDS.items(): if kw in task: return name return None