Compare commits

...

8 Commits

Author SHA1 Message Date
Dan Saunders
5b7e688fc5 fix broken link 2025-03-21 11:12:09 -04:00
Dan Saunders
5134aa66cd moving reference up near the top of the sidebar 2025-03-21 11:12:09 -04:00
Dan Saunders
ba9a867adb more autodoc progress 2025-03-21 11:12:09 -04:00
Dan Saunders
c618f42c39 Fix 2025-03-21 11:12:09 -04:00
Dan Saunders
fc1f985296 Update docs/.gitignore to exclude auto-generated API documentation files 2025-03-21 11:12:09 -04:00
Dan Saunders
a5e37f183c deletions 2025-03-21 11:12:09 -04:00
Dan Saunders
e6a7bbe9ff quartodoc progress 2025-03-21 11:12:09 -04:00
Dan Saunders
e4fd7aad0b quartodoc integration 2025-03-21 11:12:09 -04:00
8 changed files with 289 additions and 14 deletions

4
.gitignore vendored
View File

@@ -181,6 +181,10 @@ prepared-datasets/
submit.sh
*.out*
# Quartodoc generated files
objects.json
site_libs/
typings/
out/

View File

@@ -97,6 +97,7 @@ That's it! Check out our [Getting Started Guide](https://axolotl-ai-cloud.github
- [Multi-GPU Training](https://axolotl-ai-cloud.github.io/axolotl/docs/multi-gpu.html)
- [Multi-Node Training](https://axolotl-ai-cloud.github.io/axolotl/docs/multi-node.html)
- [Multipacking](https://axolotl-ai-cloud.github.io/axolotl/docs/multipack.html)
- [API Reference](https://axolotl-ai-cloud.github.io/axolotl/api/) - Auto-generated code documentation
- [FAQ](https://axolotl-ai-cloud.github.io/axolotl/docs/faq.html) - Frequently asked questions
## 🤝 Getting Help

View File

@@ -1,6 +1,166 @@
project:
type: website
quartodoc:
dir: docs/api
package: axolotl
title: API Reference
parser: google
sections:
- title: Core
desc: Core functionality for training
contents:
- train
- evaluate
- datasets
- convert
- prompt_tokenizers
- logging_config
- core.trainer_builder
- core.training_args
- core.chat.messages
- core.chat.format.chatml
- core.chat.format.llama3x
- core.chat.format.shared
- core.datasets.chat
- core.datasets.transforms.chat_builder
- title: CLI
desc: Command-line interface
contents:
- cli.main
- cli.train
- cli.evaluate
- cli.args
- cli.checks
- cli.config
- cli.inference
- cli.merge_lora
- cli.merge_sharded_fsdp_weights
- cli.preprocess
- cli.sweeps
- cli.utils
- cli.cloud.base
- cli.cloud.modal_
- title: Trainers
desc: Training implementations
contents:
- core.trainers.base
- core.trainers.trl
- core.trainers.dpo.trainer
- core.trainers.grpo.trainer
- title: Prompt Strategies
desc: Prompt formatting strategies
contents:
- prompt_strategies.base
- prompt_strategies.chat_template
- prompt_strategies.alpaca_chat
- prompt_strategies.alpaca_instruct
- prompt_strategies.alpaca_w_system
- prompt_strategies.user_defined
- prompt_strategies.llama2_chat
- prompt_strategies.completion
- prompt_strategies.input_output
- prompt_strategies.stepwise_supervised
- prompt_strategies.metharme
- prompt_strategies.orcamini
- prompt_strategies.pygmalion
- prompt_strategies.messages.chat
- prompt_strategies.dpo.chat_template
- prompt_strategies.dpo.llama3
- prompt_strategies.dpo.chatml
- prompt_strategies.dpo.zephyr
- prompt_strategies.dpo.user_defined
- prompt_strategies.dpo.passthrough
- prompt_strategies.kto.llama3
- prompt_strategies.kto.chatml
- prompt_strategies.kto.user_defined
- prompt_strategies.orpo.chat_template
- prompt_strategies.bradley_terry.llama3
- title: Kernels
desc: Low-level performance optimizations
contents:
- kernels.lora
- kernels.geglu
- kernels.swiglu
- kernels.quantize
- kernels.utils
- title: MonkeyPatches
desc: Runtime patches for model optimizations
contents:
- monkeypatch.llama_attn_hijack_flash
- monkeypatch.llama_attn_hijack_xformers
- monkeypatch.mistral_attn_hijack_flash
- monkeypatch.multipack
- monkeypatch.relora
- monkeypatch.llama_expand_mask
- monkeypatch.lora_kernels
- monkeypatch.utils
- monkeypatch.btlm_attn_hijack_flash
- monkeypatch.llama_patch_multipack
- monkeypatch.stablelm_attn_hijack_flash
- monkeypatch.trainer_fsdp_optim
- monkeypatch.transformers_fa_utils
- monkeypatch.unsloth_
- monkeypatch.attention.mllama
- monkeypatch.data.batch_dataset_fetcher
- monkeypatch.mixtral
- title: Utils
desc: Utility functions
contents:
- utils.models
- utils.tokenization
- utils.chat_templates
- utils.lora
- utils.lora_embeddings
- utils.model_shard_quant
- utils.bench
- utils.freeze
- utils.trainer
- utils.schedulers
- utils.distributed
- utils.dict
- utils.optimizers.adopt
- utils.data.pretraining
- utils.data.sft
- utils.gradient_checkpointing.unsloth
- title: Integrations
desc: Third-party integrations and extensions
contents:
- integrations.base
- integrations.cut_cross_entropy.args
- integrations.grokfast.optimizer
- integrations.kd.trainer
- integrations.liger.args
- integrations.lm_eval.args
- integrations.spectrum.args
- title: Common
desc: Common utilities and shared functionality
contents:
- common.architectures
- common.const
- common.datasets
- title: Models
desc: Custom model implementations
contents:
- models.mamba.modeling_mamba
- title: Data Processing
desc: Data processing utilities
contents:
- utils.collators.core
- utils.collators.batching
- utils.collators.mamba
- utils.collators.mm_chat
- utils.samplers.multipack
- title: Callbacks
desc: Training callbacks
contents:
- utils.callbacks.perplexity
- utils.callbacks.profiler
- utils.callbacks.lisa
- utils.callbacks.mlflow_
- utils.callbacks.comet_
website:
title: "Axolotl"
description: "We make fine-tuning accessible, scalable, and fun"
@@ -36,6 +196,12 @@ website:
- docs/cli.qmd
- docs/config.qmd
- section: "Reference"
contents:
- docs/config.qmd
- text: "API Reference"
href: docs/api/index.qmd
- section: "Dataset Formats"
contents: docs/dataset-formats/*
@@ -80,3 +246,22 @@ format:
theme: darkly
css: styles.css
toc: true
# Enable better handling of line breaks in markdown
preserve-tabs: true
html-math-method: mathjax
# Improved markdown processing options
md-extensions:
- markdown_it
- def_list
- attr_list
- fenced_divs
- tables
- html_admonition
- lineblocks
- fancy_lists
# Control whitespace handling
whitespace: preserve
# Process newlines in paragraphs
wrap: preserve
# Better line break handling
preserve-linebreaks: true

2
docs/.gitignore vendored
View File

@@ -1,2 +1,4 @@
/.quarto/
_site/
/api/*.qmd
/api/*.html

View File

@@ -1,5 +1,5 @@
---
title: "CLI Reference"
title: "Command Line Interface (CLI)"
format:
html:
toc: true

View File

@@ -6,7 +6,7 @@ description: How datasets are processed
## Overview
Dataset pre-processing is the step where Axolotl takes each dataset you've configured alongside
the [dataset format](docs/dataset-formats) and prompt strategies to:
the [dataset format](dataset-formats) and prompt strategies to:
- parse the dataset based on the *dataset format*
- transform the dataset to how you would interact with the model based on the *prompt strategy*

View File

@@ -8,6 +8,8 @@ from typing import Dict, Optional
import torch
from accelerate.logging import get_logger
from datasets import Dataset
from transformers.trainer import Trainer
from axolotl.logging_config import configure_logging
from axolotl.train import TrainDatasetMeta
@@ -25,18 +27,18 @@ LOG = get_logger("axolotl.evaluate")
def evaluate_dataset(
trainer, dataset, dataset_type: str, flash_optimum: bool = False
trainer: Trainer, dataset: Dataset, dataset_type: str, flash_optimum: bool = False
) -> Optional[Dict[str, float]]:
"""Helper function to evaluate a single dataset safely.
"""Helper function to evaluate a single dataset.
Args:
trainer: The trainer instance
dataset: Dataset to evaluate
dataset_type: Type of dataset ('train' or 'eval')
flash_optimum: Whether to use flash optimum
trainer: The trainer instance.
dataset: Dataset to evaluate.
dataset_type: Type of dataset ('train' or 'eval').
flash_optimum: Whether to use flash optimum.
Returns:
Dictionary of metrics or None if dataset is None
Dictionary of metrics or None if dataset is None.
"""
if dataset is None:
return None
@@ -63,17 +65,14 @@ def evaluate_dataset(
def evaluate(*, cfg: DictDefault, dataset_meta: TrainDatasetMeta) -> Dict[str, float]:
"""
Evaluate a model on training and validation datasets
Evaluate a model on training and validation datasets.
Args:
cfg: Dictionary mapping `axolotl` config keys to values.
dataset_meta: Dataset metadata containing training and evaluation datasets.
Returns:
Tuple containing:
- The model (either PeftModel or PreTrainedModel)
- The tokenizer
- Dictionary of evaluation metrics
Dictionary mapping metric names to their values.
"""
# pylint: disable=duplicate-code
# Enable expandable segments for cuda allocation to improve VRAM usage

View File

@@ -191,3 +191,87 @@ code span.er {
color: #5cb85c !important;
text-decoration: none !important;
}
/* API Documentation Styling */
/* Improve docstring section rendering */
.level3 p {
white-space: pre-line !important;
}
/* Format docstring sections */
.level3 p strong {
display: block;
margin-top: 1em;
font-weight: bold;
color: var(--cyan);
}
/* Add spacing after sections */
.level3 p:has(strong) {
margin-bottom: 0.5em;
}
/* Format Args and Returns sections */
p:has(code) {
line-height: 1.6;
}
/* Function signatures */
.sourceCode {
margin-bottom: 1.5em;
}
/* Parameter tables */
.doc-section-parameters table,
.doc-section-returns table {
margin-top: 1em;
margin-bottom: 1.5em;
}
/* Make parameter and returns headers smaller */
h2.anchored[data-anchor-id="parameters"],
h2.anchored[data-anchor-id="returns"],
.doc-section-parameters h4,
.doc-section-returns h4 {
font-size: 1.25rem;
margin-top: 2rem;
margin-bottom: 1rem;
color: var(--lime);
border-bottom: 1px solid var(--lime);
padding-bottom: 0.3rem;
font-family: var(--font-body);
font-weight: 500;
letter-spacing: normal;
}
/* Style documentation tables */
table {
width: 100%;
margin-bottom: 1.5rem;
border-collapse: collapse;
}
table th {
background-color: #1a1a1a;
padding: 0.5rem 1rem;
border-bottom: 2px solid var(--greige-600);
text-align: left;
}
table td {
padding: 0.5rem 1rem;
border-bottom: 1px solid var(--greige-600);
}
/* Code in table cells */
table td code {
background-color: transparent !important;
padding: 0;
}
/* Improve spacing in parameter and return tables */
.doc-section-parameters,
.doc-section-returns {
margin-top: 1rem;
}