feat(remote): bootstrap script � installs miniconda + env + deps + dirs

This commit is contained in:
2026-04-26 01:50:00 +00:00
parent aa85130ee8
commit 90fafdadf1

View File

@@ -0,0 +1,108 @@
#!/usr/bin/env bash
# llm-trainer remote bootstrap
# Sets up the GPU host so the dashboard can run training pipelines end-to-end.
# Runs as the SSH user — uses $HOME, no sudo required.
set -e
CONDA_DIR="$HOME/miniconda3"
ENV_NAME="synthetic-data"
SYNTHETIC_DIR="/opt/synthetic"
DATA_DIR="$SYNTHETIC_DIR/synthetic-data-kit/data"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# Try to use sudo only when we actually need it for /opt
SUDO=""
if [ ! -w /opt ] && [ "$(id -u)" -ne 0 ]; then
if command -v sudo >/dev/null 2>&1; then
SUDO="sudo"
else
SYNTHETIC_DIR="$HOME/synthetic"
DATA_DIR="$SYNTHETIC_DIR/synthetic-data-kit/data"
echo "[bootstrap] No sudo, falling back to $SYNTHETIC_DIR"
fi
fi
echo "::stage:: starting"
echo "[bootstrap] User: $USER Home: $HOME"
echo "[bootstrap] Conda: $CONDA_DIR"
echo "[bootstrap] Synthetic dir: $SYNTHETIC_DIR"
echo
# ── Step 1: miniconda ────────────────────────────────────────
echo "::stage:: miniconda"
if [ ! -x "$CONDA_DIR/bin/conda" ]; then
echo "[1/5] Installing miniconda to $CONDA_DIR"
TMP_INSTALLER="$(mktemp /tmp/miniconda.XXXXXX.sh)"
if command -v wget >/dev/null 2>&1; then
wget -q -O "$TMP_INSTALLER" \
https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
else
curl -fsSL -o "$TMP_INSTALLER" \
https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
fi
bash "$TMP_INSTALLER" -b -p "$CONDA_DIR"
rm -f "$TMP_INSTALLER"
else
echo "[1/5] Miniconda already installed at $CONDA_DIR"
fi
source "$CONDA_DIR/etc/profile.d/conda.sh"
# ── Step 2: conda env ────────────────────────────────────────
echo "::stage:: conda_env"
if conda env list | awk '{print $1}' | grep -qx "$ENV_NAME"; then
echo "[2/5] Conda env $ENV_NAME already exists"
else
echo "[2/5] Creating conda env $ENV_NAME (python 3.10)"
conda create -y -q -n "$ENV_NAME" python=3.10
fi
conda activate "$ENV_NAME"
# ── Step 3: Python packages ──────────────────────────────────
echo "::stage:: python_packages"
echo "[3/5] Installing Python packages (this may take several minutes)"
pip install --upgrade pip setuptools wheel
pip install \
synthetic-data-kit \
"torch>=2.1" \
"transformers>=4.40" \
"peft>=0.10" \
"trl>=0.8" \
"accelerate>=0.30" \
"bitsandbytes>=0.43" \
"datasets>=2.18" \
pyyaml \
sentencepiece \
scipy
# ── Step 4: Directories ──────────────────────────────────────
echo "::stage:: directories"
echo "[4/5] Creating data directories under $SYNTHETIC_DIR"
$SUDO mkdir -p \
"$DATA_DIR/input" \
"$DATA_DIR/parsed" \
"$DATA_DIR/generated" \
"$DATA_DIR/curated" \
"$DATA_DIR/final" \
"$SYNTHETIC_DIR/output"
if [ -n "$SUDO" ]; then
$SUDO chown -R "$USER:$USER" "$SYNTHETIC_DIR"
fi
# ── Step 5: Drop train.py + config.yaml ──────────────────────
echo "::stage:: assets"
echo "[5/5] Installing train.py and default config"
cp "$SCRIPT_DIR/train.py" "$SYNTHETIC_DIR/train.py"
chmod +x "$SYNTHETIC_DIR/train.py"
if [ ! -f "$SYNTHETIC_DIR/synthetic-data-kit/config.yaml" ]; then
cp "$SCRIPT_DIR/config.yaml" "$SYNTHETIC_DIR/synthetic-data-kit/config.yaml"
fi
echo
echo "::stage:: done"
echo "[bootstrap] OK Setup complete"
echo "[bootstrap] Conda env: $CONDA_DIR/envs/$ENV_NAME"
echo "[bootstrap] Data dirs: $DATA_DIR"
echo "[bootstrap] Train script: $SYNTHETIC_DIR/train.py"