* fix 405b with lower cpu ram requirements * make sure to use doouble quant and only skip output embeddings * set model attributes * more fixes for sharded fsdp loading * update the base model in example to use pre-quantized nf4-bf16 weights * upstream fixes for qlora+fsdp
27 lines
913 B
Plaintext
27 lines
913 B
Plaintext
ARG BASE_TAG=main
|
|
FROM winglian/axolotl:$BASE_TAG
|
|
|
|
ENV HF_DATASETS_CACHE="/workspace/data/huggingface-cache/datasets"
|
|
ENV HUGGINGFACE_HUB_CACHE="/workspace/data/huggingface-cache/hub"
|
|
ENV HF_HOME="/workspace/data/huggingface-cache/hub"
|
|
ENV HF_HUB_ENABLE_HF_TRANSFER="1"
|
|
|
|
EXPOSE 8888
|
|
EXPOSE 22
|
|
|
|
COPY scripts/cloud-entrypoint-term.sh /root/cloud-entrypoint.sh
|
|
COPY scripts/motd /etc/motd
|
|
|
|
RUN pip install jupyterlab notebook ipywidgets && \
|
|
jupyter lab clean
|
|
RUN apt install --yes --no-install-recommends openssh-server tmux sudo && \
|
|
pip3 install -U --no-cache-dir grpcio ray[default]==2.9.3 && \
|
|
mkdir -p ~/.ssh && \
|
|
chmod 700 ~/.ssh && \
|
|
printf "[ ! -z \"\$TERM\" -a -r /etc/motd ] && cat /etc/motd\n" >> ~/.bashrc && \
|
|
chmod +x /workspace/axolotl/scripts/cloud-entrypoint.sh && \
|
|
chmod +x /root/cloud-entrypoint.sh
|
|
|
|
ENTRYPOINT ["/root/cloud-entrypoint.sh"]
|
|
CMD ["sleep", "infinity"]
|