From 90fafdadf1b931c4daadcdd0890ae5a41f8adc47 Mon Sep 17 00:00:00 2001 From: tocmo0nlord Date: Sun, 26 Apr 2026 01:50:00 +0000 Subject: [PATCH] =?UTF-8?q?feat(remote):=20bootstrap=20script=20=EF=BF=BD?= =?UTF-8?q?=20installs=20miniconda=20+=20env=20+=20deps=20+=20dirs?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packaging/remote/bootstrap.sh | 108 ++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 packaging/remote/bootstrap.sh diff --git a/packaging/remote/bootstrap.sh b/packaging/remote/bootstrap.sh new file mode 100644 index 0000000..9417bf4 --- /dev/null +++ b/packaging/remote/bootstrap.sh @@ -0,0 +1,108 @@ +#!/usr/bin/env bash +# llm-trainer remote bootstrap +# Sets up the GPU host so the dashboard can run training pipelines end-to-end. +# Runs as the SSH user — uses $HOME, no sudo required. +set -e + +CONDA_DIR="$HOME/miniconda3" +ENV_NAME="synthetic-data" +SYNTHETIC_DIR="/opt/synthetic" +DATA_DIR="$SYNTHETIC_DIR/synthetic-data-kit/data" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +# Try to use sudo only when we actually need it for /opt +SUDO="" +if [ ! -w /opt ] && [ "$(id -u)" -ne 0 ]; then + if command -v sudo >/dev/null 2>&1; then + SUDO="sudo" + else + SYNTHETIC_DIR="$HOME/synthetic" + DATA_DIR="$SYNTHETIC_DIR/synthetic-data-kit/data" + echo "[bootstrap] No sudo, falling back to $SYNTHETIC_DIR" + fi +fi + +echo "::stage:: starting" +echo "[bootstrap] User: $USER Home: $HOME" +echo "[bootstrap] Conda: $CONDA_DIR" +echo "[bootstrap] Synthetic dir: $SYNTHETIC_DIR" +echo + +# ── Step 1: miniconda ──────────────────────────────────────── +echo "::stage:: miniconda" +if [ ! -x "$CONDA_DIR/bin/conda" ]; then + echo "[1/5] Installing miniconda to $CONDA_DIR" + TMP_INSTALLER="$(mktemp /tmp/miniconda.XXXXXX.sh)" + if command -v wget >/dev/null 2>&1; then + wget -q -O "$TMP_INSTALLER" \ + https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh + else + curl -fsSL -o "$TMP_INSTALLER" \ + https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh + fi + bash "$TMP_INSTALLER" -b -p "$CONDA_DIR" + rm -f "$TMP_INSTALLER" +else + echo "[1/5] Miniconda already installed at $CONDA_DIR" +fi + +source "$CONDA_DIR/etc/profile.d/conda.sh" + +# ── Step 2: conda env ──────────────────────────────────────── +echo "::stage:: conda_env" +if conda env list | awk '{print $1}' | grep -qx "$ENV_NAME"; then + echo "[2/5] Conda env $ENV_NAME already exists" +else + echo "[2/5] Creating conda env $ENV_NAME (python 3.10)" + conda create -y -q -n "$ENV_NAME" python=3.10 +fi + +conda activate "$ENV_NAME" + +# ── Step 3: Python packages ────────────────────────────────── +echo "::stage:: python_packages" +echo "[3/5] Installing Python packages (this may take several minutes)" +pip install --upgrade pip setuptools wheel +pip install \ + synthetic-data-kit \ + "torch>=2.1" \ + "transformers>=4.40" \ + "peft>=0.10" \ + "trl>=0.8" \ + "accelerate>=0.30" \ + "bitsandbytes>=0.43" \ + "datasets>=2.18" \ + pyyaml \ + sentencepiece \ + scipy + +# ── Step 4: Directories ────────────────────────────────────── +echo "::stage:: directories" +echo "[4/5] Creating data directories under $SYNTHETIC_DIR" +$SUDO mkdir -p \ + "$DATA_DIR/input" \ + "$DATA_DIR/parsed" \ + "$DATA_DIR/generated" \ + "$DATA_DIR/curated" \ + "$DATA_DIR/final" \ + "$SYNTHETIC_DIR/output" +if [ -n "$SUDO" ]; then + $SUDO chown -R "$USER:$USER" "$SYNTHETIC_DIR" +fi + +# ── Step 5: Drop train.py + config.yaml ────────────────────── +echo "::stage:: assets" +echo "[5/5] Installing train.py and default config" +cp "$SCRIPT_DIR/train.py" "$SYNTHETIC_DIR/train.py" +chmod +x "$SYNTHETIC_DIR/train.py" + +if [ ! -f "$SYNTHETIC_DIR/synthetic-data-kit/config.yaml" ]; then + cp "$SCRIPT_DIR/config.yaml" "$SYNTHETIC_DIR/synthetic-data-kit/config.yaml" +fi + +echo +echo "::stage:: done" +echo "[bootstrap] OK Setup complete" +echo "[bootstrap] Conda env: $CONDA_DIR/envs/$ENV_NAME" +echo "[bootstrap] Data dirs: $DATA_DIR" +echo "[bootstrap] Train script: $SYNTHETIC_DIR/train.py" \ No newline at end of file