Compare commits
7 Commits
diff-trans
...
docker-bas
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3afc91fba9 | ||
|
|
0689419d25 | ||
|
|
e64c32c0bd | ||
|
|
ec819dde3b | ||
|
|
fdf4bb5087 | ||
|
|
f67d16268c | ||
|
|
684b543aa1 |
57
.github/workflows/base.yml
vendored
57
.github/workflows/base.yml
vendored
@@ -22,36 +22,38 @@ jobs:
|
|||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- cuda: "121"
|
# - cuda: "121"
|
||||||
cuda_version: 12.1.1
|
# cuda_version: 12.1.1
|
||||||
cudnn_version: 8
|
# cudnn_version: 8
|
||||||
python_version: "3.10"
|
# python_version: "3.10"
|
||||||
pytorch: 2.3.1
|
# pytorch: 2.3.1
|
||||||
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
# torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
||||||
- cuda: "121"
|
# from_base_img: ""
|
||||||
cuda_version: 12.1.1
|
# from_base_tag: ""
|
||||||
cudnn_version: 8
|
# - cuda: "121"
|
||||||
python_version: "3.11"
|
# cuda_version: 12.1.1
|
||||||
pytorch: 2.3.1
|
# cudnn_version: 8
|
||||||
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
# python_version: "3.11"
|
||||||
- cuda: "124"
|
# pytorch: 2.3.1
|
||||||
cuda_version: 12.4.1
|
# torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
||||||
cudnn_version: ""
|
# from_base_img: ""
|
||||||
python_version: "3.10"
|
# from_base_tag: ""
|
||||||
pytorch: 2.4.1
|
# - cuda: "124"
|
||||||
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
# cuda_version: 12.4.1
|
||||||
- cuda: "124"
|
# cudnn_version: ""
|
||||||
cuda_version: 12.4.1
|
# python_version: "3.11"
|
||||||
cudnn_version: ""
|
# pytorch: 2.4.1
|
||||||
python_version: "3.11"
|
# torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
||||||
pytorch: 2.4.1
|
# from_base_img: ""
|
||||||
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
# from_base_tag: ""
|
||||||
- cuda: "124"
|
- cuda: "124"
|
||||||
cuda_version: 12.4.1
|
cuda_version: 12.4.1
|
||||||
cudnn_version: ""
|
cudnn_version: ""
|
||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
pytorch: 2.5.1
|
pytorch: 2.5.1
|
||||||
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
torch_cuda_arch_list: "7.0 7.5 8.0 8.6 8.7 8.9 9.0+PTX"
|
||||||
|
from_base_img: nvcr.io/nvidia/pytorch
|
||||||
|
from_base_tag: 24.10-py3
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
@@ -61,7 +63,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
images: |
|
images: |
|
||||||
winglian/axolotl-base
|
winglian/axolotl-base
|
||||||
axolotlai/axolotl-base
|
# axolotlai/axolotl-base
|
||||||
- name: Login to Docker Hub
|
- name: Login to Docker Hub
|
||||||
uses: docker/login-action@v2
|
uses: docker/login-action@v2
|
||||||
with:
|
with:
|
||||||
@@ -74,7 +76,8 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
file: ./docker/Dockerfile-base
|
file: ./docker/Dockerfile-base
|
||||||
push: ${{ github.event_name != 'pull_request' }}
|
push: true
|
||||||
|
# push: ${{ github.event_name != 'pull_request' }}
|
||||||
tags: ${{ steps.metadata.outputs.tags }}-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
tags: ${{ steps.metadata.outputs.tags }}-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}${{ matrix.axolotl_extras != '' && '-' || '' }}${{ matrix.axolotl_extras }}
|
||||||
labels: ${{ steps.metadata.outputs.labels }}
|
labels: ${{ steps.metadata.outputs.labels }}
|
||||||
build-args: |
|
build-args: |
|
||||||
@@ -84,3 +87,5 @@ jobs:
|
|||||||
PYTHON_VERSION=${{ matrix.python_version }}
|
PYTHON_VERSION=${{ matrix.python_version }}
|
||||||
PYTORCH_VERSION=${{ matrix.pytorch }}
|
PYTORCH_VERSION=${{ matrix.pytorch }}
|
||||||
TORCH_CUDA_ARCH_LIST=${{ matrix.torch_cuda_arch_list }}
|
TORCH_CUDA_ARCH_LIST=${{ matrix.torch_cuda_arch_list }}
|
||||||
|
BASE_IMAGE=${{ matrix.from_base_img || '' }}
|
||||||
|
BASE_TAG=${{ matrix.from_base_tag || '' }}
|
||||||
|
|||||||
95
.github/workflows/tests.yml
vendored
95
.github/workflows/tests.yml
vendored
@@ -148,63 +148,64 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \;
|
find "$(pip cache dir)/http-v2" -type f -mtime +14 -exec rm {} \;
|
||||||
|
|
||||||
docker-e2e-tests-1st:
|
# docker-e2e-tests-1st:
|
||||||
if: ${{ ! contains(github.event.commits[0].message, '[skip e2e]') && github.repository_owner == 'axolotl-ai-cloud' }}
|
# if: ${{ ! contains(github.event.commits[0].message, '[skip e2e]') && github.repository_owner == 'axolotl-ai-cloud' }}
|
||||||
# this job needs to be run on self-hosted GPU runners...
|
# # this job needs to be run on self-hosted GPU runners...
|
||||||
runs-on: [self-hosted, modal]
|
# runs-on: [self-hosted, modal]
|
||||||
timeout-minutes: 90
|
# timeout-minutes: 90
|
||||||
needs: [pre-commit, pytest, pytest-sdist]
|
# needs: [pre-commit, pytest, pytest-sdist]
|
||||||
|
#
|
||||||
strategy:
|
# strategy:
|
||||||
fail-fast: false
|
# fail-fast: false
|
||||||
matrix:
|
# matrix:
|
||||||
include:
|
# include:
|
||||||
- cuda: 124
|
# - cuda: 124
|
||||||
cuda_version: 12.4.1
|
# cuda_version: 12.4.1
|
||||||
python_version: "3.11"
|
# python_version: "3.11"
|
||||||
pytorch: 2.4.1
|
# pytorch: 2.4.1
|
||||||
num_gpus: 1
|
# num_gpus: 1
|
||||||
axolotl_extras:
|
# axolotl_extras:
|
||||||
steps:
|
# steps:
|
||||||
- name: Checkout
|
# - name: Checkout
|
||||||
uses: actions/checkout@v4
|
# uses: actions/checkout@v4
|
||||||
- name: Install Python
|
# - name: Install Python
|
||||||
uses: actions/setup-python@v5
|
# uses: actions/setup-python@v5
|
||||||
with:
|
# with:
|
||||||
python-version: "3.10"
|
# python-version: "3.10"
|
||||||
- name: Install Modal
|
# - name: Install Modal
|
||||||
run: |
|
# run: |
|
||||||
python -m pip install --upgrade pip
|
# python -m pip install --upgrade pip
|
||||||
pip install modal==0.63.64 jinja2
|
# pip install modal==0.63.64 jinja2
|
||||||
- name: Update env vars
|
# - name: Update env vars
|
||||||
run: |
|
# run: |
|
||||||
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
# echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
||||||
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
# echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
||||||
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
# echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
||||||
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
# echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
||||||
echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV
|
# echo "CUDA=${{ matrix.cuda }}" >> $GITHUB_ENV
|
||||||
echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV
|
# echo "N_GPUS=${{ matrix.num_gpus }}" >> $GITHUB_ENV
|
||||||
- name: Run tests job on Modal
|
# - name: Run tests job on Modal
|
||||||
run: |
|
# run: |
|
||||||
modal run cicd.tests
|
# modal run cicd.tests
|
||||||
|
|
||||||
docker-e2e-tests:
|
docker-e2e-tests:
|
||||||
if: github.repository_owner == 'axolotl-ai-cloud'
|
if: github.repository_owner == 'axolotl-ai-cloud'
|
||||||
# this job needs to be run on self-hosted GPU runners...
|
# this job needs to be run on self-hosted GPU runners...
|
||||||
runs-on: [self-hosted, modal]
|
runs-on: [self-hosted, modal]
|
||||||
timeout-minutes: 90
|
timeout-minutes: 90
|
||||||
needs: [pre-commit, pytest, docker-e2e-tests-1st]
|
# needs: [pre-commit, pytest, docker-e2e-tests-1st]
|
||||||
|
needs: [pre-commit, pytest]
|
||||||
|
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- cuda: 121
|
# - cuda: 121
|
||||||
cuda_version: 12.1.1
|
# cuda_version: 12.1.1
|
||||||
python_version: "3.10"
|
# python_version: "3.10"
|
||||||
pytorch: 2.3.1
|
# pytorch: 2.3.1
|
||||||
num_gpus: 1
|
# num_gpus: 1
|
||||||
axolotl_extras: mamba-ssm
|
# axolotl_extras: mamba-ssm
|
||||||
- cuda: 124
|
- cuda: 124
|
||||||
cuda_version: 12.4.1
|
cuda_version: 12.4.1
|
||||||
python_version: "3.11"
|
python_version: "3.11"
|
||||||
@@ -224,7 +225,7 @@ jobs:
|
|||||||
pip install modal==0.63.64 jinja2
|
pip install modal==0.63.64 jinja2
|
||||||
- name: Update env vars
|
- name: Update env vars
|
||||||
run: |
|
run: |
|
||||||
echo "BASE_TAG=main-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
echo "BASE_TAG=pr-2139-base-py${{ matrix.python_version }}-cu${{ matrix.cuda }}-${{ matrix.pytorch }}" >> $GITHUB_ENV
|
||||||
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
echo "PYTORCH_VERSION=${{ matrix.pytorch}}" >> $GITHUB_ENV
|
||||||
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
echo "AXOLOTL_ARGS=${{ matrix.axolotl_args}}" >> $GITHUB_ENV
|
||||||
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
echo "AXOLOTL_EXTRAS=${{ matrix.axolotl_extras}}" >> $GITHUB_ENV
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
FROM axolotlai/axolotl-base:{{ BASE_TAG }}
|
FROM winglian/axolotl-base:{{ BASE_TAG }}
|
||||||
|
|
||||||
ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX"
|
ENV TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX"
|
||||||
ENV AXOLOTL_EXTRAS="{{ AXOLOTL_EXTRAS }}"
|
ENV AXOLOTL_EXTRAS="{{ AXOLOTL_EXTRAS }}"
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
|
ARG BASE_IMAGE=axolotlai/axolotl-base
|
||||||
ARG BASE_TAG=main-base
|
ARG BASE_TAG=main-base
|
||||||
FROM axolotlai/axolotl-base:$BASE_TAG
|
FROM $BASE_IMAGE:$BASE_TAG
|
||||||
|
|
||||||
ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX"
|
ARG TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6+PTX"
|
||||||
ARG AXOLOTL_EXTRAS=""
|
ARG AXOLOTL_EXTRAS=""
|
||||||
|
|||||||
@@ -3,7 +3,10 @@ ARG CUDNN_VERSION="8"
|
|||||||
ARG UBUNTU_VERSION="22.04"
|
ARG UBUNTU_VERSION="22.04"
|
||||||
ARG MAX_JOBS=4
|
ARG MAX_JOBS=4
|
||||||
|
|
||||||
FROM nvidia/cuda:$CUDA_VERSION-cudnn$CUDNN_VERSION-devel-ubuntu$UBUNTU_VERSION AS base-builder
|
ARG BASE_IMAGE=nvidia/cuda
|
||||||
|
ARG DEFAULT_TAG=${CUDA_VERSION}-cudnn${CUDNN_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
||||||
|
ARG BASE_TAG=""
|
||||||
|
FROM ${BASE_IMAGE:-nvidia/cuda}:${BASE_TAG:-${DEFAULT_TAG}} AS base-builder
|
||||||
|
|
||||||
ENV PATH="/root/miniconda3/bin:${PATH}"
|
ENV PATH="/root/miniconda3/bin:${PATH}"
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user