The non-root user approach had multiple issues with RunPod
compatibility, sudo PATH handling, and tmux in exec sessions.
Restoring root as the default user for now.
This commit is contained in:
Wing Lian
2026-03-13 11:54:09 -04:00
committed by GitHub
parent ff77fa2488
commit d8a05744d7
4 changed files with 36 additions and 105 deletions

View File

@@ -1,8 +1,6 @@
ARG BASE_TAG=main
FROM axolotlai/axolotl-uv:$BASE_TAG
USER root
ENV HF_DATASETS_CACHE="/workspace/data/huggingface-cache/datasets"
ENV HF_HUB_CACHE="/workspace/data/huggingface-cache/hub"
ENV HF_HOME="/workspace/data/huggingface-cache/hub"
@@ -11,7 +9,7 @@ ENV HF_HUB_ENABLE_HF_TRANSFER="1"
EXPOSE 8888
EXPOSE 22
COPY scripts/cloud-entrypoint.sh /etc/cloud-entrypoint.sh
COPY scripts/cloud-entrypoint.sh /root/cloud-entrypoint.sh
COPY scripts/motd /etc/motd
RUN uv pip install jupyterlab notebook ipywidgets && \
@@ -20,16 +18,13 @@ RUN apt update && \
apt install --yes --no-install-recommends openssh-server tmux iproute2 nvtop && \
rm -rf /var/cache/apt/archives && \
rm -rf /var/lib/apt/lists/* && \
mkdir -p /home/ubuntu/.ssh && \
chmod 700 /home/ubuntu/.ssh && \
printf "\n[[ -z \"\$TMUX\" ]] && tty -s && { tmux attach-session -t ssh_tmux || tmux new-session -s ssh_tmux; exit; }\n" >> /home/ubuntu/.bashrc && \
printf "[ ! -z \"\$TERM\" -a -r /etc/motd ] && cat /etc/motd\n" >> /home/ubuntu/.bashrc && \
printf "\n[[ -z \"\$AXOLOTL_SKIP_SWITCH\" ]] && exec sudo -u ubuntu AXOLOTL_SKIP_SWITCH=1 -i\n" >> /root/.bashrc && \
chmod +x /etc/cloud-entrypoint.sh && \
echo 'set-option -g history-limit 5000' >> /home/ubuntu/.tmux.conf && \
chown -R ubuntu:ubuntu /home/ubuntu /workspace
mkdir -p ~/.ssh && \
chmod 700 ~/.ssh && \
printf "\n[[ -z \"\$TMUX\" ]] && { tmux attach-session -t ssh_tmux || tmux new-session -s ssh_tmux; exit; }\n" >> ~/.bashrc && \
printf "[ ! -z \"\$TERM\" -a -r /etc/motd ] && cat /etc/motd\n" >> ~/.bashrc && \
chmod +x /workspace/axolotl/scripts/cloud-entrypoint.sh && \
chmod +x /root/cloud-entrypoint.sh && \
echo 'set-option -g history-limit 5000' >> ~/.tmux.conf
# USER ubuntu
ENTRYPOINT ["/etc/cloud-entrypoint.sh"]
ENTRYPOINT ["/root/cloud-entrypoint.sh"]
CMD ["sleep", "infinity"]

View File

@@ -43,18 +43,6 @@ RUN git config remote.origin.fetch "+refs/heads/*:refs/remotes/origin/*" && \
git config --get remote.origin.fetch && \
git config --global credential.helper store
COPY .axolotl-complete.bash /home/ubuntu/.axolotl-complete.bash
RUN chmod +x /home/ubuntu/.axolotl-complete.bash && \
echo 'source /home/ubuntu/.axolotl-complete.bash' >> /home/ubuntu/.bashrc
# Ensure ubuntu user exists (may already exist from base image)
RUN id ubuntu &>/dev/null || ( \
useradd -m -s /bin/bash -u 1000 ubuntu && \
apt-get update && apt-get install -y --no-install-recommends sudo && rm -rf /var/lib/apt/lists/* \
); \
echo 'ubuntu ALL=(ALL) NOPASSWD:ALL' > /etc/sudoers.d/ubuntu && \
chmod 0440 /etc/sudoers.d/ubuntu
RUN chown -R ubuntu:ubuntu /workspace /home/ubuntu
USER ubuntu
COPY .axolotl-complete.bash /root/.axolotl-complete.bash
RUN chmod +x /root/.axolotl-complete.bash && \
echo 'source /root/.axolotl-complete.bash' >> ~/.bashrc

View File

@@ -17,19 +17,13 @@ ENV TORCH_CUDA_ARCH_LIST=$TORCH_CUDA_ARCH_LIST
ENV UV_TORCH_BACKEND="cu${CUDA}"
RUN apt-get update \
&& apt-get install -y wget git build-essential ninja-build git-lfs libaio-dev pkg-config curl sudo && rm -rf /var/lib/apt/lists/* \
&& apt-get install -y wget git build-essential ninja-build git-lfs libaio-dev pkg-config curl && rm -rf /var/lib/apt/lists/* \
&& git lfs install --skip-repo \
&& curl -LsSf https://astral.sh/uv/install.sh | env UV_INSTALL_DIR="/usr/local/bin" sh
&& curl -LsSf https://astral.sh/uv/install.sh | sh
# Create ubuntu user with passwordless sudo
RUN useradd -m -s /bin/bash -u 1000 ubuntu 2>/dev/null; \
usermod -aG sudo ubuntu && \
echo 'ubuntu ALL=(ALL) NOPASSWD:ALL' > /etc/sudoers.d/ubuntu && \
chmod 0440 /etc/sudoers.d/ubuntu
ENV PATH="/root/.local/bin:${PATH}"
ENV UV_PYTHON_INSTALL_DIR="/opt/uv/python"
RUN uv python install ${PYTHON_VERSION} && \
chmod -R a+rX /opt/uv
RUN uv python install ${PYTHON_VERSION}
WORKDIR /workspace
@@ -61,5 +55,3 @@ RUN PYTHON_CP="cp$(echo $PYTHON_VERSION | tr -d '.')" && \
wget -nv "https://github.com/mjun0812/flash-attention-prebuild-wheels/releases/download/${WHL_VERSION}/${WHL_FILE}" && \
uv pip install --no-cache-dir "${WHL_FILE}" && \
rm "${WHL_FILE}"
RUN chown -R ubuntu:ubuntu /workspace

View File

@@ -1,37 +1,19 @@
#!/bin/bash
# Detect if running as non-root and set sudo prefix accordingly
if [ "$(id -u)" -ne 0 ]; then
SUDO="sudo"
RUN_AS_USER=""
else
SUDO=""
RUN_AS_USER="sudo -u ubuntu"
fi
# Export specific ENV variables to /etc/rp_environment
echo "Exporting environment variables..."
printenv | grep -E '^HF_|^BNB_|^CUDA_|^NCCL_|^NV|^RUNPOD_|^PATH=|^_=' | sed 's/^\([^=]*\)=\(.*\)$/export \1="\2"/' | grep -v 'printenv' | $SUDO tee /etc/rp_environment > /dev/null
# Add rp_environment sourcing to ubuntu's bashrc (if ubuntu user exists and line not already present)
if id ubuntu &>/dev/null; then
grep -q 'source /etc/rp_environment' /home/ubuntu/.bashrc 2>/dev/null || \
echo 'source /etc/rp_environment' >> /home/ubuntu/.bashrc
fi
# Also add to current user's bashrc if different from ubuntu
grep -q 'source /etc/rp_environment' ~/.bashrc 2>/dev/null || \
echo 'source /etc/rp_environment' >> ~/.bashrc
printenv | grep -E '^HF_|^BNB_|^CUDA_|^NCCL_|^NV|^RUNPOD_|^PATH=|^_=' | sed 's/^\([^=]*\)=\(.*\)$/export \1="\2"/' | grep -v 'printenv' >> /etc/rp_environment
echo 'source /etc/rp_environment' >> ~/.bashrc
add_keys_to_authorized() {
local key_value=$1
local target_home=$2
# Create the .ssh directory and set permissions
mkdir -p "$target_home/.ssh"
chmod 700 "$target_home/.ssh"
# Create the ~/.ssh directory and set permissions
mkdir -p ~/.ssh
chmod 700 ~/.ssh
# Create the authorized_keys file if it doesn't exist
touch "$target_home/.ssh/authorized_keys"
touch ~/.ssh/authorized_keys
# Initialize an empty key variable
local key=""
@@ -42,7 +24,7 @@ add_keys_to_authorized() {
if [[ $word == ssh-* ]]; then
# If there's a key being built, add it to the authorized_keys file
if [[ -n $key ]]; then
echo $key >> "$target_home/.ssh/authorized_keys"
echo $key >> ~/.ssh/authorized_keys
fi
# Start a new key
key=$word
@@ -54,42 +36,29 @@ add_keys_to_authorized() {
# Add the last key to the authorized_keys file
if [[ -n $key ]]; then
echo $key >> "$target_home/.ssh/authorized_keys"
echo $key >> ~/.ssh/authorized_keys
fi
# Set the correct permissions
chmod 600 "$target_home/.ssh/authorized_keys"
chmod 700 -R "$target_home/.ssh"
}
setup_ssh_keys() {
local key_value=$1
# Set up keys for the current user
add_keys_to_authorized "$key_value" "$HOME"
# Also set up keys for ubuntu user if we're root and ubuntu exists
if [ "$(id -u)" -eq 0 ] && id ubuntu &>/dev/null; then
add_keys_to_authorized "$key_value" "/home/ubuntu"
chown -R ubuntu:ubuntu /home/ubuntu/.ssh
fi
chmod 600 ~/.ssh/authorized_keys
chmod 700 -R ~/.ssh
}
# Set SSH port
if [ ! -z "$SSH_PORT" ]; then
$SUDO sed -i "s/#Port 22/Port $SSH_PORT/" /etc/ssh/sshd_config
sed -i "s/#Port 22/Port $SSH_PORT/" /etc/ssh/sshd_config
fi
if [[ $PUBLIC_KEY ]]; then
# runpod, prime intellect
setup_ssh_keys "$PUBLIC_KEY"
add_keys_to_authorized "$PUBLIC_KEY"
# Start the SSH service in the background
$SUDO service ssh start
service ssh start
elif [[ $SSH_KEY ]]; then
# latitude.sh
setup_ssh_keys "$SSH_KEY"
add_keys_to_authorized "$SSH_KEY"
# Start the SSH service in the background
$SUDO service ssh start
service ssh start
else
echo "No PUBLIC_KEY or SSH_KEY environment variable provided, not starting openSSH daemon"
fi
@@ -101,16 +70,8 @@ if [ -n "$JUPYTER_PASSWORD" ]; then
fi
if [ "$JUPYTER_DISABLE" != "1" ]; then
# Run Jupyter Lab as ubuntu user when possible
JUPYTER_ARGS="--port=8888 --ip=* --ServerApp.allow_origin=*"
if [ "$(id -u)" -eq 0 ] && id ubuntu &>/dev/null; then
sudo --preserve-env=PATH,JUPYTER_TOKEN -u ubuntu jupyter lab $JUPYTER_ARGS &
else
if [ "$(id -u)" -eq 0 ]; then
JUPYTER_ARGS="$JUPYTER_ARGS --allow-root"
fi
jupyter lab $JUPYTER_ARGS &
fi
# Run Jupyter Lab in the background
jupyter lab --port=8888 --ip=* --allow-root --ServerApp.allow_origin=* &
fi
if [ ! -d "/workspace/data/axolotl-artifacts" ]; then
@@ -119,19 +80,14 @@ fi
if [ ! -L "/workspace/axolotl/outputs" ]; then
ln -sf /workspace/data/axolotl-artifacts /workspace/axolotl/outputs
fi
chown -R ubuntu:ubuntu /workspace 2>/dev/null || true
# start the runpod slurm init
SLURM_INIT="${SLURM_INIT:-/slurm-init.sh}"
if [[ -f "$SLURM_INIT" ]]; then
echo "[entrypoint] running $SLURM_INIT..."
$SUDO bash "$SLURM_INIT"
bash "$SLURM_INIT"
fi
# Execute the passed arguments (CMD) as ubuntu when possible
if [ "$(id -u)" -eq 0 ] && id ubuntu &>/dev/null; then
exec sudo --preserve-env=PATH -u ubuntu "$@"
else
exec "$@"
fi
# Execute the passed arguments (CMD)
exec "$@"