commit 90a6ee6fbfc685d1f2b6ad85a7cfe8665be2b810 Author: tocmo0nlord Date: Sat Mar 21 17:13:32 2026 -0400 Initial scaffold: LLM Trainer Dashboard Full-stack app with FastAPI backend (SSH/paramiko, pipeline streaming, GPU stats, xterm.js terminal, Ollama model manager) and React + Tailwind frontend (8 panels: Connection, Documents, Pipeline, QA Pairs, Training, Terminal, Models, Config). Docker Compose included. Co-Authored-By: Claude Sonnet 4.6 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c7331e8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,33 @@ +# Python +__pycache__/ +*.py[cod] +*.pyo +.env +.venv/ +venv/ +*.egg-info/ +dist/ +build/ + +# Node +node_modules/ +frontend/dist/ +frontend/.vite/ + +# OS +.DS_Store +Thumbs.db +desktop.ini + +# IDE +.vscode/ +.idea/ +*.swp + +# Secrets +*.pem +*.key +id_rsa +id_ed25519 +.env.local +.env.production diff --git a/backend/Dockerfile b/backend/Dockerfile new file mode 100644 index 0000000..a18d900 --- /dev/null +++ b/backend/Dockerfile @@ -0,0 +1,12 @@ +FROM python:3.11-slim + +WORKDIR /app + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY . . + +EXPOSE 8080 + +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"] diff --git a/backend/gpu.py b/backend/gpu.py new file mode 100644 index 0000000..3564899 --- /dev/null +++ b/backend/gpu.py @@ -0,0 +1,39 @@ +from ssh_client import ssh_manager + + +def get_gpu_stats() -> dict: + """Query nvidia-smi on the remote host and return parsed GPU info.""" + try: + if not ssh_manager.is_connected(): + return {"gpus": [], "error": "Not connected"} + + out, err, code = ssh_manager.execute( + "nvidia-smi --query-gpu=name,utilization.gpu,memory.used,memory.total," + "temperature.gpu,power.draw --format=csv,noheader,nounits", + use_conda=False + ) + + if code != 0: + return {"gpus": [], "error": err.strip() or "nvidia-smi failed"} + + gpus = [] + for line in out.strip().split("\n"): + if not line.strip(): + continue + parts = [p.strip() for p in line.split(",")] + if len(parts) >= 5: + try: + gpus.append({ + "name": parts[0], + "utilization": int(parts[1]), + "memory_used": int(parts[2]), + "memory_total": int(parts[3]), + "temperature": int(parts[4]), + "power_draw": float(parts[5]) if len(parts) > 5 else None, + }) + except (ValueError, IndexError): + pass + + return {"gpus": gpus, "error": None} + except Exception as e: + return {"gpus": [], "error": str(e)} diff --git a/backend/main.py b/backend/main.py new file mode 100644 index 0000000..13467c0 --- /dev/null +++ b/backend/main.py @@ -0,0 +1,450 @@ +import asyncio +import json +import os +import tempfile +import threading +from pathlib import Path +from typing import Optional + +import httpx +import yaml +from fastapi import FastAPI, File, HTTPException, UploadFile, WebSocket, WebSocketDisconnect +from fastapi.middleware.cors import CORSMiddleware +from pydantic import BaseModel + +from gpu import get_gpu_stats +from pipeline import STAGE_DIRS, CONFIG_PATH, ingest_cmd, create_cmd, curate_cmd, save_as_cmd, train_cmd +from ssh_client import ssh_manager + +# ────────────────────────────────────────────────────────────────────────────── +app = FastAPI(title="LLM Trainer API", version="1.0.0") + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +OLLAMA_URL = os.getenv("OLLAMA_URL", "http://192.168.2.47:11434") + + +# ────────────────────────────────────────────────────────────────────────────── +# Pydantic models +# ────────────────────────────────────────────────────────────────────────────── + +class ConnectRequest(BaseModel): + host: str = "192.168.2.47" + username: str = "tocmo0nlord" + password: Optional[str] = None + key_path: Optional[str] = None + port: int = 22 + + +class TrainRequest(BaseModel): + model_name: str = "llama3.1:8b" + dataset_path: str + output_dir: str = "/opt/synthetic/output" + num_epochs: int = 3 + batch_size: int = 2 + learning_rate: float = 2e-4 + + +# ────────────────────────────────────────────────────────────────────────────── +# Helpers +# ────────────────────────────────────────────────────────────────────────────── + +def _require_ssh(): + if not ssh_manager.is_connected(): + raise HTTPException(status_code=503, detail="Not connected to SSH server") + + +async def _stream_ws(websocket: WebSocket, command: str, use_conda: bool = True): + """Run a remote command and stream output lines over WebSocket.""" + await websocket.accept() + loop = asyncio.get_event_loop() + queue: asyncio.Queue = asyncio.Queue() + + def _worker(): + try: + for line in ssh_manager.execute_stream(command, use_conda=use_conda): + asyncio.run_coroutine_threadsafe( + queue.put({"type": "log", "data": line}), loop + ) + asyncio.run_coroutine_threadsafe( + queue.put({"type": "done", "data": "Command completed."}), loop + ) + except Exception as exc: + asyncio.run_coroutine_threadsafe( + queue.put({"type": "error", "data": str(exc)}), loop + ) + + threading.Thread(target=_worker, daemon=True).start() + + try: + while True: + msg = await queue.get() + await websocket.send_json(msg) + if msg["type"] in ("done", "error"): + break + except WebSocketDisconnect: + pass + + +# ────────────────────────────────────────────────────────────────────────────── +# Connection +# ────────────────────────────────────────────────────────────────────────────── + +@app.post("/api/connect") +async def connect(req: ConnectRequest): + try: + ssh_manager.connect( + host=req.host, + username=req.username, + password=req.password, + key_path=req.key_path, + port=req.port, + ) + return {"status": "connected", "host": req.host, "username": req.username} + except Exception as exc: + raise HTTPException(status_code=500, detail=str(exc)) + + +@app.post("/api/disconnect") +async def disconnect(): + ssh_manager.disconnect() + return {"status": "disconnected"} + + +@app.get("/api/status") +async def status(): + connected = ssh_manager.is_connected() + gpu = get_gpu_stats() if connected else {"gpus": [], "error": "Not connected"} + return { + "connected": connected, + "host": ssh_manager.host if connected else None, + "username": ssh_manager.username if connected else None, + "gpu": gpu, + } + + +# ────────────────────────────────────────────────────────────────────────────── +# GPU +# ────────────────────────────────────────────────────────────────────────────── + +@app.get("/api/gpu") +async def gpu(): + _require_ssh() + return get_gpu_stats() + + +# ────────────────────────────────────────────────────────────────────────────── +# File management +# ────────────────────────────────────────────────────────────────────────────── + +@app.get("/api/files/{stage}") +async def list_files(stage: str): + if stage not in STAGE_DIRS: + raise HTTPException(status_code=400, detail=f"Unknown stage: {stage}") + _require_ssh() + + out, _, code = ssh_manager.execute( + f"ls -la '{STAGE_DIRS[stage]}' 2>/dev/null | tail -n +2", use_conda=False + ) + + files = [] + for line in out.strip().split("\n"): + if not line.strip() or line.startswith("total"): + continue + parts = line.split() + if len(parts) >= 9 and not parts[0].startswith("d"): + files.append({ + "name": " ".join(parts[8:]), + "size": int(parts[4]), + "modified": f"{parts[5]} {parts[6]} {parts[7]}", + }) + + return {"stage": stage, "directory": STAGE_DIRS[stage], "files": files} + + +@app.delete("/api/files/{stage}/{filename}") +async def delete_file(stage: str, filename: str): + if stage not in STAGE_DIRS: + raise HTTPException(status_code=400, detail=f"Unknown stage: {stage}") + _require_ssh() + + path = f"{STAGE_DIRS[stage]}/{filename}" + _, err, code = ssh_manager.execute(f"rm -f '{path}'", use_conda=False) + if code != 0: + raise HTTPException(status_code=500, detail=err) + return {"deleted": filename} + + +@app.get("/api/files/{stage}/{filename}/preview") +async def preview_file(stage: str, filename: str, lines: int = 120): + if stage not in STAGE_DIRS: + raise HTTPException(status_code=400, detail=f"Unknown stage: {stage}") + _require_ssh() + + path = f"{STAGE_DIRS[stage]}/{filename}" + out, err, code = ssh_manager.execute(f"head -n {lines} '{path}'", use_conda=False) + if code != 0: + raise HTTPException(status_code=500, detail=err) + return {"filename": filename, "content": out} + + +@app.post("/api/upload") +async def upload_file(file: UploadFile = File(...)): + _require_ssh() + + suffix = Path(file.filename).suffix + with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp: + tmp.write(await file.read()) + tmp_path = tmp.name + + try: + remote_path = f"{STAGE_DIRS['input']}/{file.filename}" + ssh_manager.upload_file(tmp_path, remote_path) + return {"uploaded": file.filename, "remote_path": remote_path} + finally: + os.unlink(tmp_path) + + +# ────────────────────────────────────────────────────────────────────────────── +# Pipeline (WebSocket streaming) +# ────────────────────────────────────────────────────────────────────────────── + +@app.websocket("/api/pipeline/ingest") +async def ws_ingest(websocket: WebSocket, filename: str): + if not ssh_manager.is_connected(): + await websocket.accept() + await websocket.send_json({"type": "error", "data": "Not connected"}) + return + cmd = ingest_cmd(f"{STAGE_DIRS['input']}/{filename}") + await _stream_ws(websocket, cmd) + + +@app.websocket("/api/pipeline/create") +async def ws_create(websocket: WebSocket, filename: str, + num_pairs: int = 50, pair_type: str = "qa"): + if not ssh_manager.is_connected(): + await websocket.accept() + await websocket.send_json({"type": "error", "data": "Not connected"}) + return + cmd = create_cmd(f"{STAGE_DIRS['parsed']}/{filename}", num_pairs, pair_type) + await _stream_ws(websocket, cmd) + + +@app.websocket("/api/pipeline/curate") +async def ws_curate(websocket: WebSocket, filename: str, + output_filename: str, threshold: float = 7.0): + if not ssh_manager.is_connected(): + await websocket.accept() + await websocket.send_json({"type": "error", "data": "Not connected"}) + return + cmd = curate_cmd( + f"{STAGE_DIRS['generated']}/{filename}", + f"{STAGE_DIRS['curated']}/{output_filename}", + threshold, + ) + await _stream_ws(websocket, cmd) + + +@app.websocket("/api/pipeline/save") +async def ws_save(websocket: WebSocket, filename: str, + output_filename: str, fmt: str = "jsonl"): + if not ssh_manager.is_connected(): + await websocket.accept() + await websocket.send_json({"type": "error", "data": "Not connected"}) + return + cmd = save_as_cmd( + f"{STAGE_DIRS['curated']}/{filename}", + f"{STAGE_DIRS['final']}/{output_filename}", + fmt, + ) + await _stream_ws(websocket, cmd) + + +# ────────────────────────────────────────────────────────────────────────────── +# QA Pairs viewer +# ────────────────────────────────────────────────────────────────────────────── + +@app.get("/api/pairs/{filename}") +async def get_pairs(filename: str, stage: str = "generated"): + _require_ssh() + path = f"{STAGE_DIRS.get(stage, STAGE_DIRS['generated'])}/{filename}" + out, err, code = ssh_manager.execute(f"cat '{path}'", use_conda=False) + if code != 0: + raise HTTPException(status_code=404, detail=f"File not found: {filename}") + + pairs = [] + for line in out.strip().split("\n"): + if not line.strip(): + continue + try: + pairs.append(json.loads(line)) + except json.JSONDecodeError: + pass + + return {"filename": filename, "count": len(pairs), "pairs": pairs} + + +# ────────────────────────────────────────────────────────────────────────────── +# Config editor +# ────────────────────────────────────────────────────────────────────────────── + +@app.get("/api/config") +async def get_config(): + _require_ssh() + try: + raw = ssh_manager.read_remote_file(CONFIG_PATH) + return {"config": yaml.safe_load(raw), "raw": raw} + except Exception as exc: + raise HTTPException(status_code=500, detail=str(exc)) + + +@app.put("/api/config") +async def update_config(payload: dict): + _require_ssh() + try: + ssh_manager.write_remote_file(CONFIG_PATH, yaml.dump(payload, default_flow_style=False)) + return {"status": "updated"} + except Exception as exc: + raise HTTPException(status_code=500, detail=str(exc)) + + +# ────────────────────────────────────────────────────────────────────────────── +# Training (WebSocket streaming) +# ────────────────────────────────────────────────────────────────────────────── + +@app.websocket("/api/train") +async def ws_train( + websocket: WebSocket, + model_name: str = "llama3.1:8b", + dataset_path: str = "", + output_dir: str = "/opt/synthetic/output", + num_epochs: int = 3, + batch_size: int = 2, + learning_rate: float = 2e-4, +): + if not ssh_manager.is_connected(): + await websocket.accept() + await websocket.send_json({"type": "error", "data": "Not connected"}) + return + + cmd = train_cmd(model_name, dataset_path, output_dir, num_epochs, batch_size, learning_rate) + await _stream_ws(websocket, cmd) + + +# ────────────────────────────────────────────────────────────────────────────── +# Interactive terminal (xterm.js ↔ SSH shell) +# ────────────────────────────────────────────────────────────────────────────── + +@app.websocket("/api/terminal") +async def ws_terminal(websocket: WebSocket): + await websocket.accept() + + if not ssh_manager.is_connected(): + await websocket.send_text("\r\nNot connected to SSH server.\r\n") + return + + channel = None + try: + channel = ssh_manager.open_shell_channel() + + async def ssh_to_ws(): + while True: + if channel.recv_ready(): + data = channel.recv(4096) + if not data: + break + await websocket.send_bytes(data) + elif channel.exit_status_ready(): + break + else: + await asyncio.sleep(0.02) + + async def ws_to_ssh(): + try: + while True: + data = await websocket.receive_bytes() + channel.send(data) + except WebSocketDisconnect: + pass + + await asyncio.gather(ssh_to_ws(), ws_to_ssh()) + except WebSocketDisconnect: + pass + except Exception as exc: + try: + await websocket.send_text(f"\r\nError: {exc}\r\n") + except Exception: + pass + finally: + if channel: + try: + channel.close() + except Exception: + pass + + +# ────────────────────────────────────────────────────────────────────────────── +# Model manager (Ollama) +# ────────────────────────────────────────────────────────────────────────────── + +@app.get("/api/models") +async def list_models(): + try: + async with httpx.AsyncClient(timeout=10) as client: + resp = await client.get(f"{OLLAMA_URL}/api/tags") + resp.raise_for_status() + return {"models": resp.json().get("models", [])} + except Exception as exc: + raise HTTPException(status_code=500, detail=str(exc)) + + +@app.websocket("/api/models/pull") +async def ws_pull_model(websocket: WebSocket, model_name: str): + await websocket.accept() + try: + async with httpx.AsyncClient(timeout=600) as client: + async with client.stream( + "POST", f"{OLLAMA_URL}/api/pull", + json={"name": model_name, "stream": True} + ) as resp: + async for line in resp.aiter_lines(): + if line.strip(): + try: + await websocket.send_json(json.loads(line)) + except json.JSONDecodeError: + pass + await websocket.send_json({"status": "success"}) + except WebSocketDisconnect: + pass + except Exception as exc: + try: + await websocket.send_json({"status": "error", "error": str(exc)}) + except Exception: + pass + + +@app.delete("/api/models/{model_name:path}") +async def delete_model(model_name: str): + try: + async with httpx.AsyncClient(timeout=30) as client: + resp = await client.request( + "DELETE", f"{OLLAMA_URL}/api/delete", + json={"name": model_name} + ) + resp.raise_for_status() + return {"deleted": model_name} + except Exception as exc: + raise HTTPException(status_code=500, detail=str(exc)) + + +# ────────────────────────────────────────────────────────────────────────────── + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8080, reload=True) diff --git a/backend/pipeline.py b/backend/pipeline.py new file mode 100644 index 0000000..5c5cf9a --- /dev/null +++ b/backend/pipeline.py @@ -0,0 +1,73 @@ +# ────────────────────────────────────────────────────────────────────────────── +# Pipeline paths & command builders +# These match the remote Ubuntu server layout from LLM_TRAINER_APP_SCOPE.md +# ────────────────────────────────────────────────────────────────────────────── + +SDK_BIN = ( + "/home/tocmo0nlord/miniconda3/envs/synthetic-data/bin/synthetic-data-kit" +) +CONFIG_PATH = "/opt/synthetic/synthetic-data-kit/config.yaml" +DATA_BASE = "/opt/synthetic/synthetic-data-kit/data" + +STAGE_DIRS = { + "input": f"{DATA_BASE}/input", + "parsed": f"{DATA_BASE}/parsed", + "generated": f"{DATA_BASE}/generated", + "curated": f"{DATA_BASE}/curated", + "final": f"{DATA_BASE}/final", +} + +TRAIN_SCRIPT = "/opt/synthetic/train.py" +OUTPUT_BASE = "/opt/synthetic/output" + + +def _sdk(subcommand: str, *args) -> str: + return f"{SDK_BIN} --config {CONFIG_PATH} {subcommand} {' '.join(args)}" + + +def ingest_cmd(input_file: str) -> str: + return _sdk("ingest", f"'{input_file}'", "-o", STAGE_DIRS["parsed"]) + + +def create_cmd(parsed_file: str, num_pairs: int = 50, pair_type: str = "qa") -> str: + return _sdk( + "create", f"'{parsed_file}'", + "-o", STAGE_DIRS["generated"], + "--type", pair_type, + "--num-pairs", str(num_pairs), + ) + + +def curate_cmd(generated_file: str, output_file: str, threshold: float = 7.0) -> str: + return _sdk( + "curate", f"'{generated_file}'", + "-o", f"'{output_file}'", + "--threshold", str(threshold), + ) + + +def save_as_cmd(curated_file: str, output_file: str, fmt: str = "jsonl") -> str: + return _sdk( + "save-as", f"'{curated_file}'", + "-f", fmt, + "-o", f"'{output_file}'", + ) + + +def train_cmd( + model_name: str, + dataset_path: str, + output_dir: str = OUTPUT_BASE, + num_epochs: int = 3, + batch_size: int = 2, + learning_rate: float = 2e-4, +) -> str: + return ( + f"python3 {TRAIN_SCRIPT} " + f"--model '{model_name}' " + f"--dataset '{dataset_path}' " + f"--output '{output_dir}' " + f"--epochs {num_epochs} " + f"--batch-size {batch_size} " + f"--lr {learning_rate}" + ) diff --git a/backend/requirements.txt b/backend/requirements.txt new file mode 100644 index 0000000..9c3fe4a --- /dev/null +++ b/backend/requirements.txt @@ -0,0 +1,7 @@ +fastapi==0.111.0 +uvicorn[standard]==0.29.0 +paramiko==3.4.0 +httpx==0.27.0 +pyyaml==6.0.1 +python-multipart==0.0.9 +websockets==12.0 diff --git a/backend/ssh_client.py b/backend/ssh_client.py new file mode 100644 index 0000000..0841ad4 --- /dev/null +++ b/backend/ssh_client.py @@ -0,0 +1,177 @@ +import base64 +import threading +import time +from typing import Optional + +import paramiko + + +class SSHClient: + def __init__(self): + self.client: Optional[paramiko.SSHClient] = None + self.connected = False + self.host = "" + self.username = "" + self.port = 22 + self._keepalive_thread: Optional[threading.Thread] = None + self._stop_keepalive = threading.Event() + self._lock = threading.Lock() + + def connect(self, host: str, username: str, password: str = None, + key_path: str = None, port: int = 22) -> bool: + with self._lock: + try: + if self.client: + self.client.close() + + self.client = paramiko.SSHClient() + self.client.set_missing_host_key_policy(paramiko.AutoAddPolicy()) + + kwargs = {"hostname": host, "port": port, "username": username, "timeout": 10} + if key_path: + kwargs["key_filename"] = key_path + if password: + kwargs["password"] = password + + self.client.connect(**kwargs) + self.connected = True + self.host = host + self.username = username + self.port = port + + self._stop_keepalive.clear() + self._keepalive_thread = threading.Thread(target=self._keepalive_loop, daemon=True) + self._keepalive_thread.start() + return True + except Exception as e: + self.connected = False + raise e + + def disconnect(self): + self._stop_keepalive.set() + if self.client: + self.client.close() + self.connected = False + + def _keepalive_loop(self): + while not self._stop_keepalive.wait(30): + try: + transport = self.client.get_transport() + if transport and transport.is_active(): + transport.send_ignore() + else: + self.connected = False + break + except Exception: + self.connected = False + break + + def execute(self, command: str, use_conda: bool = True) -> tuple: + if not self.is_connected(): + raise Exception("Not connected to SSH server") + + if use_conda: + full_cmd = ( + f"source /home/{self.username}/miniconda3/etc/profile.d/conda.sh && " + f"conda activate synthetic-data && {command}" + ) + else: + full_cmd = command + + _, stdout, stderr = self.client.exec_command(full_cmd) + out = stdout.read().decode("utf-8", errors="replace") + err = stderr.read().decode("utf-8", errors="replace") + exit_code = stdout.channel.recv_exit_status() + return out, err, exit_code + + def execute_stream(self, command: str, use_conda: bool = True): + """Generator that yields output lines from a command.""" + if not self.is_connected(): + raise Exception("Not connected to SSH server") + + if use_conda: + full_cmd = ( + f"source /home/{self.username}/miniconda3/etc/profile.d/conda.sh && " + f"conda activate synthetic-data && {command}" + ) + else: + full_cmd = command + + transport = self.client.get_transport() + channel = transport.open_session() + channel.get_pty() + channel.exec_command(full_cmd) + + buffer = b"" + while True: + if channel.recv_ready(): + data = channel.recv(4096) + if not data: + break + buffer += data + while b"\n" in buffer: + line, buffer = buffer.split(b"\n", 1) + yield line.decode("utf-8", errors="replace") + "\n" + elif channel.exit_status_ready(): + if buffer: + yield buffer.decode("utf-8", errors="replace") + break + else: + time.sleep(0.05) + + channel.close() + + def open_shell_channel(self, term: str = "xterm-256color", width: int = 220, height: int = 50): + """Open an interactive shell channel for the terminal panel.""" + if not self.is_connected(): + raise Exception("Not connected to SSH server") + + transport = self.client.get_transport() + channel = transport.open_session() + channel.get_pty(term=term, width=width, height=height) + channel.invoke_shell() + + # Auto-activate conda env + activate = ( + f"source /home/{self.username}/miniconda3/etc/profile.d/conda.sh && " + f"conda activate synthetic-data\n" + ) + channel.send(activate) + return channel + + def upload_file(self, local_path: str, remote_path: str): + if not self.is_connected(): + raise Exception("Not connected to SSH server") + sftp = self.client.open_sftp() + try: + sftp.put(local_path, remote_path) + finally: + sftp.close() + + def read_remote_file(self, remote_path: str) -> str: + out, err, code = self.execute(f"cat '{remote_path}'", use_conda=False) + if code != 0: + raise Exception(f"Failed to read file: {err}") + return out + + def write_remote_file(self, remote_path: str, content: str): + encoded = base64.b64encode(content.encode()).decode() + cmd = f"echo '{encoded}' | base64 -d > '{remote_path}'" + out, err, code = self.execute(cmd, use_conda=False) + if code != 0: + raise Exception(f"Failed to write file: {err}") + + def is_connected(self) -> bool: + try: + if self.client: + transport = self.client.get_transport() + if transport and transport.is_active(): + return True + except Exception: + pass + self.connected = False + return False + + +# Singleton shared across all routes +ssh_manager = SSHClient() diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..3bf5a50 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,29 @@ +version: "3.9" + +services: + + backend: + build: ./backend + container_name: llm-trainer-backend + restart: unless-stopped + ports: + - "8080:8080" + environment: + - OLLAMA_URL=http://192.168.2.47:11434 + networks: + - llm-net + + frontend: + build: ./frontend + container_name: llm-trainer-frontend + restart: unless-stopped + ports: + - "3000:80" + depends_on: + - backend + networks: + - llm-net + +networks: + llm-net: + driver: bridge diff --git a/frontend/Dockerfile b/frontend/Dockerfile new file mode 100644 index 0000000..af52e95 --- /dev/null +++ b/frontend/Dockerfile @@ -0,0 +1,14 @@ +FROM node:20-alpine AS builder + +WORKDIR /app +COPY package.json . +RUN npm install + +COPY . . +RUN npm run build + +FROM nginx:alpine +COPY --from=builder /app/dist /usr/share/nginx/html +COPY nginx.conf /etc/nginx/conf.d/default.conf +EXPOSE 80 +CMD ["nginx", "-g", "daemon off;"] diff --git a/frontend/index.html b/frontend/index.html new file mode 100644 index 0000000..2849adb --- /dev/null +++ b/frontend/index.html @@ -0,0 +1,18 @@ + + + + + + LLM Trainer Dashboard + + + + + +
+ + + diff --git a/frontend/nginx.conf b/frontend/nginx.conf new file mode 100644 index 0000000..378f0da --- /dev/null +++ b/frontend/nginx.conf @@ -0,0 +1,22 @@ +server { + listen 80; + server_name _; + + root /usr/share/nginx/html; + index index.html; + + # Proxy API and WebSocket calls to the backend + location /api/ { + proxy_pass http://backend:8080; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "upgrade"; + proxy_set_header Host $host; + proxy_read_timeout 3600; + } + + # SPA fallback + location / { + try_files $uri $uri/ /index.html; + } +} diff --git a/frontend/package.json b/frontend/package.json new file mode 100644 index 0000000..d40087b --- /dev/null +++ b/frontend/package.json @@ -0,0 +1,28 @@ +{ + "name": "llm-trainer-frontend", + "version": "1.0.0", + "type": "module", + "scripts": { + "dev": "vite", + "build": "vite build", + "preview": "vite preview" + }, + "dependencies": { + "react": "^18.2.0", + "react-dom": "^18.2.0", + "axios": "^1.6.7", + "@xterm/xterm": "^5.3.0", + "@xterm/addon-fit": "^0.8.0", + "@xterm/addon-web-links": "^0.9.0", + "recharts": "^2.10.3", + "react-dropzone": "^14.2.3", + "lucide-react": "^0.344.0" + }, + "devDependencies": { + "@vitejs/plugin-react": "^4.2.1", + "autoprefixer": "^10.4.17", + "postcss": "^8.4.35", + "tailwindcss": "^3.4.1", + "vite": "^5.1.4" + } +} diff --git a/frontend/postcss.config.js b/frontend/postcss.config.js new file mode 100644 index 0000000..2e7af2b --- /dev/null +++ b/frontend/postcss.config.js @@ -0,0 +1,6 @@ +export default { + plugins: { + tailwindcss: {}, + autoprefixer: {}, + }, +} diff --git a/frontend/src/App.jsx b/frontend/src/App.jsx new file mode 100644 index 0000000..f5c1dca --- /dev/null +++ b/frontend/src/App.jsx @@ -0,0 +1,141 @@ +import React, { useState, useEffect, useCallback } from 'react' +import axios from 'axios' +import { + Wifi, WifiOff, Server, FileText, GitBranch, + Table2, Activity, TerminalSquare, Box, Settings, RefreshCw, +} from 'lucide-react' + +import ConnectionPanel from './components/ConnectionPanel' +import DocumentManager from './components/DocumentManager' +import PipelineRunner from './components/PipelineRunner' +import QAPairViewer from './components/QAPairViewer' +import TrainingMonitor from './components/TrainingMonitor' +import Terminal from './components/Terminal' +import ModelManager from './components/ModelManager' +import ConfigEditor from './components/ConfigEditor' + +const API = '' // vite proxy forwards /api → :8080 + +const NAV = [ + { id: 'connection', label: 'Connection', icon: Server }, + { id: 'documents', label: 'Documents', icon: FileText }, + { id: 'pipeline', label: 'Pipeline', icon: GitBranch }, + { id: 'pairs', label: 'QA Pairs', icon: Table2 }, + { id: 'training', label: 'Training', icon: Activity }, + { id: 'terminal', label: 'Terminal', icon: TerminalSquare }, + { id: 'models', label: 'Models', icon: Box }, + { id: 'config', label: 'Config', icon: Settings }, +] + +export default function App() { + const [active, setActive] = useState('connection') + const [connected, setConnected] = useState(false) + const [gpuInfo, setGpuInfo] = useState(null) + const [statusMsg, setStatusMsg] = useState('') + + const fetchStatus = useCallback(async () => { + try { + const { data } = await axios.get(`${API}/api/status`) + setConnected(data.connected) + if (data.gpu?.gpus?.length) setGpuInfo(data.gpu.gpus[0]) + } catch { + setConnected(false) + } + }, []) + + useEffect(() => { + fetchStatus() + const id = setInterval(fetchStatus, 10000) + return () => clearInterval(id) + }, [fetchStatus]) + + const panels = { + connection: , + documents: , + pipeline: , + pairs: , + training: , + terminal: , + models: , + config: , + } + + return ( +
+ + {/* ── Sidebar ── */} + + + {/* ── Main content ── */} +
+ {/* Top bar */} +
+

+ {NAV.find(n => n.id === active)?.label} +

+ {statusMsg && ( + {statusMsg} + )} +
+ +
+ {panels[active]} +
+
+
+ ) +} diff --git a/frontend/src/components/ConfigEditor.jsx b/frontend/src/components/ConfigEditor.jsx new file mode 100644 index 0000000..bf44170 --- /dev/null +++ b/frontend/src/components/ConfigEditor.jsx @@ -0,0 +1,192 @@ +import React, { useState, useEffect } from 'react' +import axios from 'axios' +import { Save, RefreshCw, ChevronDown, ChevronRight } from 'lucide-react' + +function Field({ label, value, onChange, type = 'text', options = null }) { + return ( +
+ + {options ? ( + + ) : ( + onChange(type === 'number' ? Number(e.target.value) : e.target.value)} + className="w-full bg-[#0f1117] border border-slate-700 rounded-lg px-3 py-1.5 text-xs + text-slate-200 focus:outline-none focus:border-blue-500" + /> + )} +
+ ) +} + +function Section({ title, children, defaultOpen = true }) { + const [open, setOpen] = useState(defaultOpen) + return ( +
+ + {open &&
{children}
} +
+ ) +} + +export default function ConfigEditor({ connected }) { + const [config, setConfig] = useState(null) + const [raw, setRaw] = useState('') + const [mode, setMode] = useState('form') // 'form' | 'raw' + const [loading, setLoading] = useState(false) + const [saving, setSaving] = useState(false) + const [msg, setMsg] = useState('') + + const load = async () => { + if (!connected) return + setLoading(true); setMsg('') + try { + const { data } = await axios.get('/api/config') + setConfig(data.config || {}) + setRaw(data.raw || '') + } catch (err) { + setMsg(`Load failed: ${err.response?.data?.detail || err.message}`) + } finally { + setLoading(false) + } + } + + useEffect(() => { load() }, [connected]) + + const set = (path, val) => { + setConfig(c => { + const updated = { ...c } + const keys = path.split('.') + let cur = updated + for (let i = 0; i < keys.length - 1; i++) { + cur[keys[i]] = { ...(cur[keys[i]] || {}) } + cur = cur[keys[i]] + } + cur[keys[keys.length - 1]] = val + return updated + }) + } + + const g = path => { + if (!config) return '' + return path.split('.').reduce((o, k) => (o || {})[k], config) ?? '' + } + + const save = async () => { + setSaving(true); setMsg('') + try { + const payload = mode === 'raw' + ? await axios.put('/api/config', config) // fallback + : await axios.put('/api/config', config) + setMsg('Configuration saved ✓') + } catch (err) { + setMsg(`Save failed: ${err.response?.data?.detail || err.message}`) + } finally { + setSaving(false) + } + } + + return ( +
+ + {/* Toolbar */} +
+
+ {['form', 'raw'].map(m => ( + + ))} +
+
+ + +
+
+ + {msg && ( +

{msg}

+ )} + + {!connected ? ( +
+ Connect to SSH server to edit config. +
+ ) : loading ? ( +

Loading config…

+ ) : mode === 'raw' ? ( +
+

+ Editing /opt/synthetic/synthetic-data-kit/config.yaml directly +

+