* feat: move to uv first * fix: update doc to uv first * fix: merge dev/tests into uv pyproject * fix: update docker docs to match current config * fix: migrate examples to readme * fix: add llmcompressor to conflict * feat: rec uv sync with lockfile for dev/ci * fix: update docker docs to clarify how to use uv images * chore: docs * fix: use system python, no venv * fix: set backend cpu * fix: only set for installing pytorch step * fix: remove unsloth kernel and installs * fix: remove U in tests * fix: set backend in deps too * chore: test * chore: comments * fix: attempt to lock torch * fix: workaround torch cuda and not upgraded * fix: forgot to push * fix: missed source * fix: nightly upstream loralinear config * fix: nightly phi3 long rope not work * fix: forgot commit * fix: test phi3 template change * fix: no more requirements * fix: carry over changes from new requirements to pyproject * chore: remove lockfile per discussion * fix: set match-runtime * fix: remove unneeded hf hub buildtime * fix: duplicate cache delete on nightly * fix: torchvision being overridden * fix: migrate to uv images * fix: leftover from merge * fix: simplify base readme * fix: update assertion message to be clearer * chore: docs * fix: change fallback for cicd script * fix: match against main exactly * fix: peft 0.19.1 change * fix: e2e test * fix: ci * fix: e2e test
367 lines
11 KiB
YAML
367 lines
11 KiB
YAML
project:
|
|
type: website
|
|
pre-render:
|
|
- docs/scripts/generate_config_docs.py
|
|
- docs/scripts/generate_examples_docs.py
|
|
|
|
quartodoc:
|
|
dir: docs/api
|
|
package: axolotl
|
|
title: API Reference
|
|
parser: google
|
|
|
|
sections:
|
|
- title: Core
|
|
desc: Core functionality for training
|
|
contents:
|
|
- train
|
|
- evaluate
|
|
- datasets
|
|
- convert
|
|
- prompt_tokenizers
|
|
- logging_config
|
|
- core.builders.base
|
|
- core.builders.causal
|
|
- core.builders.rl
|
|
- core.training_args
|
|
- core.chat.messages
|
|
- core.chat.format.chatml
|
|
- core.chat.format.llama3x
|
|
- core.chat.format.shared
|
|
- core.datasets.chat
|
|
- core.datasets.transforms.chat_builder
|
|
- title: CLI
|
|
desc: Command-line interface
|
|
contents:
|
|
- cli.main
|
|
- cli.train
|
|
- cli.evaluate
|
|
- cli.args
|
|
- cli.art
|
|
- cli.checks
|
|
- cli.config
|
|
- cli.delinearize_llama4
|
|
- cli.inference
|
|
- cli.merge_lora
|
|
- cli.merge_sharded_fsdp_weights
|
|
- cli.preprocess
|
|
- cli.quantize
|
|
- cli.vllm_serve
|
|
- cli.cloud.base
|
|
- cli.cloud.modal_
|
|
- cli.utils
|
|
- cli.utils.args
|
|
- cli.utils.fetch
|
|
- cli.utils.load
|
|
- cli.utils.sweeps
|
|
- cli.utils.train
|
|
- title: Trainers
|
|
desc: Training implementations
|
|
contents:
|
|
- core.trainers.base
|
|
- core.trainers.trl
|
|
- core.trainers.mamba
|
|
- core.trainers.dpo.trainer
|
|
- core.trainers.grpo.trainer
|
|
- core.trainers.grpo.sampler
|
|
- core.trainers.utils
|
|
- title: Model Loading
|
|
desc: Functionality for loading and patching models, tokenizers, etc.
|
|
contents:
|
|
- loaders.model
|
|
- loaders.tokenizer
|
|
- loaders.processor
|
|
- loaders.adapter
|
|
- loaders.patch_manager
|
|
- loaders.constants
|
|
- title: Mixins
|
|
desc: Mixin classes for augmenting trainers
|
|
contents:
|
|
- core.trainers.mixins.optimizer
|
|
- core.trainers.mixins.rng_state_loader
|
|
- core.trainers.mixins.scheduler
|
|
- title: Context Managers
|
|
desc: Context managers for altering trainer behaviors
|
|
contents:
|
|
- utils.ctx_managers.sequence_parallel
|
|
- title: Prompt Strategies
|
|
desc: Prompt formatting strategies
|
|
contents:
|
|
- prompt_strategies.base
|
|
- prompt_strategies.chat_template
|
|
- prompt_strategies.alpaca_chat
|
|
- prompt_strategies.alpaca_instruct
|
|
- prompt_strategies.alpaca_w_system
|
|
- prompt_strategies.user_defined
|
|
- prompt_strategies.llama2_chat
|
|
- prompt_strategies.completion
|
|
- prompt_strategies.input_output
|
|
- prompt_strategies.stepwise_supervised
|
|
- prompt_strategies.metharme
|
|
- prompt_strategies.orcamini
|
|
- prompt_strategies.pygmalion
|
|
- prompt_strategies.messages.chat
|
|
- prompt_strategies.dpo.chat_template
|
|
- prompt_strategies.dpo.llama3
|
|
- prompt_strategies.dpo.chatml
|
|
- prompt_strategies.dpo.zephyr
|
|
- prompt_strategies.dpo.user_defined
|
|
- prompt_strategies.dpo.passthrough
|
|
- prompt_strategies.kto.llama3
|
|
- prompt_strategies.kto.chatml
|
|
- prompt_strategies.kto.user_defined
|
|
- prompt_strategies.orpo.chat_template
|
|
- prompt_strategies.bradley_terry.llama3
|
|
- title: Kernels
|
|
desc: Low-level performance optimizations
|
|
contents:
|
|
- kernels.lora
|
|
- kernels.geglu
|
|
- kernels.swiglu
|
|
- kernels.quantize
|
|
- kernels.utils
|
|
- title: Monkey Patches
|
|
desc: Runtime patches for model optimizations
|
|
contents:
|
|
- monkeypatch.llama_attn_hijack_flash
|
|
- monkeypatch.llama_attn_hijack_xformers
|
|
- monkeypatch.mistral_attn_hijack_flash
|
|
- monkeypatch.multipack
|
|
- monkeypatch.relora
|
|
- monkeypatch.lora_kernels
|
|
- monkeypatch.utils
|
|
- monkeypatch.btlm_attn_hijack_flash
|
|
- monkeypatch.stablelm_attn_hijack_flash
|
|
- monkeypatch.trainer_fsdp_optim
|
|
- monkeypatch.transformers_fa_utils
|
|
- monkeypatch.data.batch_dataset_fetcher
|
|
- monkeypatch.mixtral
|
|
- monkeypatch.gradient_checkpointing.offload_cpu
|
|
- monkeypatch.gradient_checkpointing.offload_disk
|
|
- title: Utils
|
|
desc: Utility functions
|
|
contents:
|
|
- utils.tokenization
|
|
- utils.chat_templates
|
|
- utils.lora
|
|
- utils.model_shard_quant
|
|
- utils.bench
|
|
- utils.freeze
|
|
- utils.trainer
|
|
- utils.schedulers
|
|
- utils.distributed
|
|
- utils.dict
|
|
- utils.optimizers.adopt
|
|
- utils.data.streaming
|
|
- utils.data.sft
|
|
- utils.quantization
|
|
- title: Schemas
|
|
desc: Pydantic data models for Axolotl config
|
|
contents:
|
|
- utils.schemas.config
|
|
- utils.schemas.model
|
|
- utils.schemas.training
|
|
- utils.schemas.datasets
|
|
- utils.schemas.peft
|
|
- utils.schemas.trl
|
|
- utils.schemas.multimodal
|
|
- utils.schemas.integrations
|
|
- utils.schemas.enums
|
|
- utils.schemas.utils
|
|
- title: Integrations
|
|
desc: Third-party integrations and extensions
|
|
contents:
|
|
- integrations.base
|
|
- integrations.cut_cross_entropy.args
|
|
- integrations.grokfast.optimizer
|
|
- integrations.kd.trainer
|
|
- integrations.liger.args
|
|
- integrations.lm_eval.args
|
|
- integrations.spectrum.args
|
|
- title: Common
|
|
desc: Common utilities and shared functionality
|
|
contents:
|
|
- common.architectures
|
|
- common.const
|
|
- common.datasets
|
|
- title: Models
|
|
desc: Custom model implementations
|
|
contents:
|
|
- models.mamba.modeling_mamba
|
|
- title: Data Processing
|
|
desc: Data processing utilities
|
|
contents:
|
|
- utils.collators.core
|
|
- utils.collators.batching
|
|
- utils.collators.mamba
|
|
- utils.collators.mm_chat
|
|
- utils.samplers.multipack
|
|
- title: Callbacks
|
|
desc: Training callbacks
|
|
contents:
|
|
- utils.callbacks.perplexity
|
|
- utils.callbacks.profiler
|
|
- utils.callbacks.lisa
|
|
- utils.callbacks.mlflow_
|
|
- utils.callbacks.comet_
|
|
- utils.callbacks.qat
|
|
website:
|
|
title: "Axolotl"
|
|
description: "We make fine-tuning accessible, scalable, and fun"
|
|
favicon: favicon.jpg
|
|
|
|
google-analytics: "G-9KYCVJBNMQ"
|
|
|
|
navbar:
|
|
logo: image/axolotl_logo_digital_white.svg
|
|
title: false
|
|
background: dark
|
|
pinned: false
|
|
collapse: false
|
|
tools:
|
|
- icon: twitter
|
|
href: https://twitter.com/axolotl_ai
|
|
- icon: github
|
|
href: https://github.com/axolotl-ai-cloud/axolotl/
|
|
- icon: discord
|
|
href: https://discord.gg/7m9sfhzaf3
|
|
|
|
sidebar:
|
|
pinned: true
|
|
collapse-level: 2
|
|
style: docked
|
|
contents:
|
|
- text: Home
|
|
href: index.qmd
|
|
|
|
- section: "Getting Started"
|
|
contents:
|
|
- docs/getting-started.qmd
|
|
- docs/choosing_method.qmd
|
|
- docs/installation.qmd
|
|
- docs/inference.qmd
|
|
- section: "Model Guides"
|
|
contents:
|
|
- docs/models/kimi-linear.qmd
|
|
- docs/models/plano.qmd
|
|
- docs/models/mimo.qmd
|
|
- docs/models/internvl3_5.qmd
|
|
- docs/models/olmo3.qmd
|
|
- docs/models/trinity.qmd
|
|
- docs/models/arcee.qmd
|
|
- section: "Ministral3"
|
|
contents:
|
|
- docs/models/ministral3.qmd
|
|
- docs/models/ministral3/think.qmd
|
|
- docs/models/ministral3/vision.qmd
|
|
- section: "Magistral"
|
|
contents:
|
|
- docs/models/magistral.qmd
|
|
- docs/models/magistral/think.qmd
|
|
- docs/models/magistral/vision.qmd
|
|
- docs/models/ministral.qmd
|
|
- docs/models/mistral-small.qmd
|
|
- docs/models/voxtral.qmd
|
|
- docs/models/devstral.qmd
|
|
- docs/models/mistral.qmd
|
|
- docs/models/llama-4.qmd
|
|
- docs/models/llama-2.qmd
|
|
- docs/models/qwen3-next.qmd
|
|
- docs/models/qwen3.qmd
|
|
- docs/models/gemma3n.qmd
|
|
- docs/models/apertus.qmd
|
|
- docs/models/gpt-oss.qmd
|
|
- docs/models/seed-oss.qmd
|
|
- docs/models/phi.qmd
|
|
- docs/models/smolvlm2.qmd
|
|
- docs/models/granite4.qmd
|
|
- docs/models/LiquidAI.qmd
|
|
- docs/models/hunyuan.qmd
|
|
- docs/models/jamba.qmd
|
|
- docs/models/orpheus.qmd
|
|
|
|
- docs/cli.qmd
|
|
- docs/telemetry.qmd
|
|
- docs/config-reference.qmd
|
|
- text: "API Reference"
|
|
href: docs/api
|
|
|
|
- section: "Dataset Formats"
|
|
contents: docs/dataset-formats/*
|
|
|
|
- section: "Deployments"
|
|
contents:
|
|
- docs/docker.qmd
|
|
- docs/multi-gpu.qmd
|
|
- docs/multi-node.qmd
|
|
- docs/ray-integration.qmd
|
|
- docs/amd_hpc.qmd
|
|
- docs/mac.qmd
|
|
|
|
- section: "How To Guides"
|
|
contents:
|
|
- docs/multimodal.qmd
|
|
- docs/rlhf.qmd
|
|
- docs/grpo.qmd
|
|
- docs/ebft.qmd
|
|
- docs/vllm_serving.qmd
|
|
- docs/reward_modelling.qmd
|
|
- docs/lr_groups.qmd
|
|
- docs/lora_optims.qmd
|
|
- docs/dataset_loading.qmd
|
|
- docs/qat.qmd
|
|
- docs/quantize.qmd
|
|
- docs/optimizations.qmd
|
|
|
|
- section: "Core Concepts"
|
|
contents:
|
|
- docs/batch_vs_grad.qmd
|
|
- docs/dataset_preprocessing.qmd
|
|
- docs/streaming.qmd
|
|
- docs/multipack.qmd
|
|
- docs/mixed_precision.qmd
|
|
- docs/optimizers.qmd
|
|
- docs/attention.qmd
|
|
|
|
- section: "Advanced Features"
|
|
contents:
|
|
- docs/fsdp_qlora.qmd
|
|
- docs/torchao.qmd
|
|
- docs/custom_integrations.qmd
|
|
- docs/sequence_parallelism.qmd
|
|
- docs/gradient_checkpointing.qmd
|
|
- docs/nd_parallelism.qmd
|
|
- docs/expert_quantization.qmd
|
|
|
|
- section: "Troubleshooting"
|
|
contents:
|
|
- docs/faq.qmd
|
|
- docs/training_stability.qmd
|
|
- docs/debugging.qmd
|
|
- docs/nccl.qmd
|
|
|
|
format:
|
|
html:
|
|
theme: darkly
|
|
css: styles.css
|
|
toc: true
|
|
# Enable better handling of line breaks in markdown
|
|
preserve-tabs: true
|
|
html-math-method: mathjax
|
|
# Improved markdown processing options
|
|
md-extensions:
|
|
- markdown_it
|
|
- def_list
|
|
- attr_list
|
|
- fenced_divs
|
|
- tables
|
|
- html_admonition
|
|
- lineblocks
|
|
- fancy_lists
|
|
# Control whitespace handling
|
|
whitespace: preserve
|
|
# Process newlines in paragraphs
|
|
wrap: preserve
|
|
# Better line break handling
|
|
preserve-linebreaks: true
|