Initial scaffold: LLM Trainer Dashboard
Full-stack app with FastAPI backend (SSH/paramiko, pipeline streaming, GPU stats, xterm.js terminal, Ollama model manager) and React + Tailwind frontend (8 panels: Connection, Documents, Pipeline, QA Pairs, Training, Terminal, Models, Config). Docker Compose included. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
73
backend/pipeline.py
Normal file
73
backend/pipeline.py
Normal file
@@ -0,0 +1,73 @@
|
||||
# ──────────────────────────────────────────────────────────────────────────────
|
||||
# Pipeline paths & command builders
|
||||
# These match the remote Ubuntu server layout from LLM_TRAINER_APP_SCOPE.md
|
||||
# ──────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
SDK_BIN = (
|
||||
"/home/tocmo0nlord/miniconda3/envs/synthetic-data/bin/synthetic-data-kit"
|
||||
)
|
||||
CONFIG_PATH = "/opt/synthetic/synthetic-data-kit/config.yaml"
|
||||
DATA_BASE = "/opt/synthetic/synthetic-data-kit/data"
|
||||
|
||||
STAGE_DIRS = {
|
||||
"input": f"{DATA_BASE}/input",
|
||||
"parsed": f"{DATA_BASE}/parsed",
|
||||
"generated": f"{DATA_BASE}/generated",
|
||||
"curated": f"{DATA_BASE}/curated",
|
||||
"final": f"{DATA_BASE}/final",
|
||||
}
|
||||
|
||||
TRAIN_SCRIPT = "/opt/synthetic/train.py"
|
||||
OUTPUT_BASE = "/opt/synthetic/output"
|
||||
|
||||
|
||||
def _sdk(subcommand: str, *args) -> str:
|
||||
return f"{SDK_BIN} --config {CONFIG_PATH} {subcommand} {' '.join(args)}"
|
||||
|
||||
|
||||
def ingest_cmd(input_file: str) -> str:
|
||||
return _sdk("ingest", f"'{input_file}'", "-o", STAGE_DIRS["parsed"])
|
||||
|
||||
|
||||
def create_cmd(parsed_file: str, num_pairs: int = 50, pair_type: str = "qa") -> str:
|
||||
return _sdk(
|
||||
"create", f"'{parsed_file}'",
|
||||
"-o", STAGE_DIRS["generated"],
|
||||
"--type", pair_type,
|
||||
"--num-pairs", str(num_pairs),
|
||||
)
|
||||
|
||||
|
||||
def curate_cmd(generated_file: str, output_file: str, threshold: float = 7.0) -> str:
|
||||
return _sdk(
|
||||
"curate", f"'{generated_file}'",
|
||||
"-o", f"'{output_file}'",
|
||||
"--threshold", str(threshold),
|
||||
)
|
||||
|
||||
|
||||
def save_as_cmd(curated_file: str, output_file: str, fmt: str = "jsonl") -> str:
|
||||
return _sdk(
|
||||
"save-as", f"'{curated_file}'",
|
||||
"-f", fmt,
|
||||
"-o", f"'{output_file}'",
|
||||
)
|
||||
|
||||
|
||||
def train_cmd(
|
||||
model_name: str,
|
||||
dataset_path: str,
|
||||
output_dir: str = OUTPUT_BASE,
|
||||
num_epochs: int = 3,
|
||||
batch_size: int = 2,
|
||||
learning_rate: float = 2e-4,
|
||||
) -> str:
|
||||
return (
|
||||
f"python3 {TRAIN_SCRIPT} "
|
||||
f"--model '{model_name}' "
|
||||
f"--dataset '{dataset_path}' "
|
||||
f"--output '{output_dir}' "
|
||||
f"--epochs {num_epochs} "
|
||||
f"--batch-size {batch_size} "
|
||||
f"--lr {learning_rate}"
|
||||
)
|
||||
Reference in New Issue
Block a user