Compare commits
4 Commits
5b7e688fc5
...
pre-commit
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
156fede4f7 | ||
|
|
dcbbd7af79 | ||
|
|
21bac7ce1a | ||
|
|
aaa4571826 |
4
.gitignore
vendored
4
.gitignore
vendored
@@ -181,10 +181,6 @@ prepared-datasets/
|
|||||||
submit.sh
|
submit.sh
|
||||||
*.out*
|
*.out*
|
||||||
|
|
||||||
# Quartodoc generated files
|
|
||||||
objects.json
|
|
||||||
site_libs/
|
|
||||||
|
|
||||||
typings/
|
typings/
|
||||||
out/
|
out/
|
||||||
|
|
||||||
|
|||||||
@@ -97,7 +97,6 @@ That's it! Check out our [Getting Started Guide](https://axolotl-ai-cloud.github
|
|||||||
- [Multi-GPU Training](https://axolotl-ai-cloud.github.io/axolotl/docs/multi-gpu.html)
|
- [Multi-GPU Training](https://axolotl-ai-cloud.github.io/axolotl/docs/multi-gpu.html)
|
||||||
- [Multi-Node Training](https://axolotl-ai-cloud.github.io/axolotl/docs/multi-node.html)
|
- [Multi-Node Training](https://axolotl-ai-cloud.github.io/axolotl/docs/multi-node.html)
|
||||||
- [Multipacking](https://axolotl-ai-cloud.github.io/axolotl/docs/multipack.html)
|
- [Multipacking](https://axolotl-ai-cloud.github.io/axolotl/docs/multipack.html)
|
||||||
- [API Reference](https://axolotl-ai-cloud.github.io/axolotl/api/) - Auto-generated code documentation
|
|
||||||
- [FAQ](https://axolotl-ai-cloud.github.io/axolotl/docs/faq.html) - Frequently asked questions
|
- [FAQ](https://axolotl-ai-cloud.github.io/axolotl/docs/faq.html) - Frequently asked questions
|
||||||
|
|
||||||
## 🤝 Getting Help
|
## 🤝 Getting Help
|
||||||
|
|||||||
185
_quarto.yml
185
_quarto.yml
@@ -1,166 +1,6 @@
|
|||||||
project:
|
project:
|
||||||
type: website
|
type: website
|
||||||
|
|
||||||
quartodoc:
|
|
||||||
dir: docs/api
|
|
||||||
package: axolotl
|
|
||||||
title: API Reference
|
|
||||||
parser: google
|
|
||||||
|
|
||||||
sections:
|
|
||||||
- title: Core
|
|
||||||
desc: Core functionality for training
|
|
||||||
contents:
|
|
||||||
- train
|
|
||||||
- evaluate
|
|
||||||
- datasets
|
|
||||||
- convert
|
|
||||||
- prompt_tokenizers
|
|
||||||
- logging_config
|
|
||||||
- core.trainer_builder
|
|
||||||
- core.training_args
|
|
||||||
- core.chat.messages
|
|
||||||
- core.chat.format.chatml
|
|
||||||
- core.chat.format.llama3x
|
|
||||||
- core.chat.format.shared
|
|
||||||
- core.datasets.chat
|
|
||||||
- core.datasets.transforms.chat_builder
|
|
||||||
- title: CLI
|
|
||||||
desc: Command-line interface
|
|
||||||
contents:
|
|
||||||
- cli.main
|
|
||||||
- cli.train
|
|
||||||
- cli.evaluate
|
|
||||||
- cli.args
|
|
||||||
- cli.checks
|
|
||||||
- cli.config
|
|
||||||
- cli.inference
|
|
||||||
- cli.merge_lora
|
|
||||||
- cli.merge_sharded_fsdp_weights
|
|
||||||
- cli.preprocess
|
|
||||||
- cli.sweeps
|
|
||||||
- cli.utils
|
|
||||||
- cli.cloud.base
|
|
||||||
- cli.cloud.modal_
|
|
||||||
- title: Trainers
|
|
||||||
desc: Training implementations
|
|
||||||
contents:
|
|
||||||
- core.trainers.base
|
|
||||||
- core.trainers.trl
|
|
||||||
- core.trainers.dpo.trainer
|
|
||||||
- core.trainers.grpo.trainer
|
|
||||||
- title: Prompt Strategies
|
|
||||||
desc: Prompt formatting strategies
|
|
||||||
contents:
|
|
||||||
- prompt_strategies.base
|
|
||||||
- prompt_strategies.chat_template
|
|
||||||
- prompt_strategies.alpaca_chat
|
|
||||||
- prompt_strategies.alpaca_instruct
|
|
||||||
- prompt_strategies.alpaca_w_system
|
|
||||||
- prompt_strategies.user_defined
|
|
||||||
- prompt_strategies.llama2_chat
|
|
||||||
- prompt_strategies.completion
|
|
||||||
- prompt_strategies.input_output
|
|
||||||
- prompt_strategies.stepwise_supervised
|
|
||||||
- prompt_strategies.metharme
|
|
||||||
- prompt_strategies.orcamini
|
|
||||||
- prompt_strategies.pygmalion
|
|
||||||
- prompt_strategies.messages.chat
|
|
||||||
- prompt_strategies.dpo.chat_template
|
|
||||||
- prompt_strategies.dpo.llama3
|
|
||||||
- prompt_strategies.dpo.chatml
|
|
||||||
- prompt_strategies.dpo.zephyr
|
|
||||||
- prompt_strategies.dpo.user_defined
|
|
||||||
- prompt_strategies.dpo.passthrough
|
|
||||||
- prompt_strategies.kto.llama3
|
|
||||||
- prompt_strategies.kto.chatml
|
|
||||||
- prompt_strategies.kto.user_defined
|
|
||||||
- prompt_strategies.orpo.chat_template
|
|
||||||
- prompt_strategies.bradley_terry.llama3
|
|
||||||
- title: Kernels
|
|
||||||
desc: Low-level performance optimizations
|
|
||||||
contents:
|
|
||||||
- kernels.lora
|
|
||||||
- kernels.geglu
|
|
||||||
- kernels.swiglu
|
|
||||||
- kernels.quantize
|
|
||||||
- kernels.utils
|
|
||||||
- title: MonkeyPatches
|
|
||||||
desc: Runtime patches for model optimizations
|
|
||||||
contents:
|
|
||||||
- monkeypatch.llama_attn_hijack_flash
|
|
||||||
- monkeypatch.llama_attn_hijack_xformers
|
|
||||||
- monkeypatch.mistral_attn_hijack_flash
|
|
||||||
- monkeypatch.multipack
|
|
||||||
- monkeypatch.relora
|
|
||||||
- monkeypatch.llama_expand_mask
|
|
||||||
- monkeypatch.lora_kernels
|
|
||||||
- monkeypatch.utils
|
|
||||||
- monkeypatch.btlm_attn_hijack_flash
|
|
||||||
- monkeypatch.llama_patch_multipack
|
|
||||||
- monkeypatch.stablelm_attn_hijack_flash
|
|
||||||
- monkeypatch.trainer_fsdp_optim
|
|
||||||
- monkeypatch.transformers_fa_utils
|
|
||||||
- monkeypatch.unsloth_
|
|
||||||
- monkeypatch.attention.mllama
|
|
||||||
- monkeypatch.data.batch_dataset_fetcher
|
|
||||||
- monkeypatch.mixtral
|
|
||||||
- title: Utils
|
|
||||||
desc: Utility functions
|
|
||||||
contents:
|
|
||||||
- utils.models
|
|
||||||
- utils.tokenization
|
|
||||||
- utils.chat_templates
|
|
||||||
- utils.lora
|
|
||||||
- utils.lora_embeddings
|
|
||||||
- utils.model_shard_quant
|
|
||||||
- utils.bench
|
|
||||||
- utils.freeze
|
|
||||||
- utils.trainer
|
|
||||||
- utils.schedulers
|
|
||||||
- utils.distributed
|
|
||||||
- utils.dict
|
|
||||||
- utils.optimizers.adopt
|
|
||||||
- utils.data.pretraining
|
|
||||||
- utils.data.sft
|
|
||||||
- utils.gradient_checkpointing.unsloth
|
|
||||||
- title: Integrations
|
|
||||||
desc: Third-party integrations and extensions
|
|
||||||
contents:
|
|
||||||
- integrations.base
|
|
||||||
- integrations.cut_cross_entropy.args
|
|
||||||
- integrations.grokfast.optimizer
|
|
||||||
- integrations.kd.trainer
|
|
||||||
- integrations.liger.args
|
|
||||||
- integrations.lm_eval.args
|
|
||||||
- integrations.spectrum.args
|
|
||||||
- title: Common
|
|
||||||
desc: Common utilities and shared functionality
|
|
||||||
contents:
|
|
||||||
- common.architectures
|
|
||||||
- common.const
|
|
||||||
- common.datasets
|
|
||||||
- title: Models
|
|
||||||
desc: Custom model implementations
|
|
||||||
contents:
|
|
||||||
- models.mamba.modeling_mamba
|
|
||||||
- title: Data Processing
|
|
||||||
desc: Data processing utilities
|
|
||||||
contents:
|
|
||||||
- utils.collators.core
|
|
||||||
- utils.collators.batching
|
|
||||||
- utils.collators.mamba
|
|
||||||
- utils.collators.mm_chat
|
|
||||||
- utils.samplers.multipack
|
|
||||||
- title: Callbacks
|
|
||||||
desc: Training callbacks
|
|
||||||
contents:
|
|
||||||
- utils.callbacks.perplexity
|
|
||||||
- utils.callbacks.profiler
|
|
||||||
- utils.callbacks.lisa
|
|
||||||
- utils.callbacks.mlflow_
|
|
||||||
- utils.callbacks.comet_
|
|
||||||
|
|
||||||
website:
|
website:
|
||||||
title: "Axolotl"
|
title: "Axolotl"
|
||||||
description: "We make fine-tuning accessible, scalable, and fun"
|
description: "We make fine-tuning accessible, scalable, and fun"
|
||||||
@@ -196,12 +36,6 @@ website:
|
|||||||
- docs/cli.qmd
|
- docs/cli.qmd
|
||||||
- docs/config.qmd
|
- docs/config.qmd
|
||||||
|
|
||||||
- section: "Reference"
|
|
||||||
contents:
|
|
||||||
- docs/config.qmd
|
|
||||||
- text: "API Reference"
|
|
||||||
href: docs/api/index.qmd
|
|
||||||
|
|
||||||
- section: "Dataset Formats"
|
- section: "Dataset Formats"
|
||||||
contents: docs/dataset-formats/*
|
contents: docs/dataset-formats/*
|
||||||
|
|
||||||
@@ -246,22 +80,3 @@ format:
|
|||||||
theme: darkly
|
theme: darkly
|
||||||
css: styles.css
|
css: styles.css
|
||||||
toc: true
|
toc: true
|
||||||
# Enable better handling of line breaks in markdown
|
|
||||||
preserve-tabs: true
|
|
||||||
html-math-method: mathjax
|
|
||||||
# Improved markdown processing options
|
|
||||||
md-extensions:
|
|
||||||
- markdown_it
|
|
||||||
- def_list
|
|
||||||
- attr_list
|
|
||||||
- fenced_divs
|
|
||||||
- tables
|
|
||||||
- html_admonition
|
|
||||||
- lineblocks
|
|
||||||
- fancy_lists
|
|
||||||
# Control whitespace handling
|
|
||||||
whitespace: preserve
|
|
||||||
# Process newlines in paragraphs
|
|
||||||
wrap: preserve
|
|
||||||
# Better line break handling
|
|
||||||
preserve-linebreaks: true
|
|
||||||
|
|||||||
2
docs/.gitignore
vendored
2
docs/.gitignore
vendored
@@ -1,4 +1,2 @@
|
|||||||
/.quarto/
|
/.quarto/
|
||||||
_site/
|
_site/
|
||||||
/api/*.qmd
|
|
||||||
/api/*.html
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
---
|
---
|
||||||
title: "Command Line Interface (CLI)"
|
title: "CLI Reference"
|
||||||
format:
|
format:
|
||||||
html:
|
html:
|
||||||
toc: true
|
toc: true
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ description: How datasets are processed
|
|||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
Dataset pre-processing is the step where Axolotl takes each dataset you've configured alongside
|
Dataset pre-processing is the step where Axolotl takes each dataset you've configured alongside
|
||||||
the [dataset format](dataset-formats) and prompt strategies to:
|
the [dataset format](docs/dataset-formats) and prompt strategies to:
|
||||||
|
|
||||||
- parse the dataset based on the *dataset format*
|
- parse the dataset based on the *dataset format*
|
||||||
- transform the dataset to how you would interact with the model based on the *prompt strategy*
|
- transform the dataset to how you would interact with the model based on the *prompt strategy*
|
||||||
|
|||||||
@@ -8,8 +8,6 @@ from typing import Dict, Optional
|
|||||||
|
|
||||||
import torch
|
import torch
|
||||||
from accelerate.logging import get_logger
|
from accelerate.logging import get_logger
|
||||||
from datasets import Dataset
|
|
||||||
from transformers.trainer import Trainer
|
|
||||||
|
|
||||||
from axolotl.logging_config import configure_logging
|
from axolotl.logging_config import configure_logging
|
||||||
from axolotl.train import TrainDatasetMeta
|
from axolotl.train import TrainDatasetMeta
|
||||||
@@ -27,18 +25,18 @@ LOG = get_logger("axolotl.evaluate")
|
|||||||
|
|
||||||
|
|
||||||
def evaluate_dataset(
|
def evaluate_dataset(
|
||||||
trainer: Trainer, dataset: Dataset, dataset_type: str, flash_optimum: bool = False
|
trainer, dataset, dataset_type: str, flash_optimum: bool = False
|
||||||
) -> Optional[Dict[str, float]]:
|
) -> Optional[Dict[str, float]]:
|
||||||
"""Helper function to evaluate a single dataset.
|
"""Helper function to evaluate a single dataset safely.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
trainer: The trainer instance.
|
trainer: The trainer instance
|
||||||
dataset: Dataset to evaluate.
|
dataset: Dataset to evaluate
|
||||||
dataset_type: Type of dataset ('train' or 'eval').
|
dataset_type: Type of dataset ('train' or 'eval')
|
||||||
flash_optimum: Whether to use flash optimum.
|
flash_optimum: Whether to use flash optimum
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dictionary of metrics or None if dataset is None.
|
Dictionary of metrics or None if dataset is None
|
||||||
"""
|
"""
|
||||||
if dataset is None:
|
if dataset is None:
|
||||||
return None
|
return None
|
||||||
@@ -65,14 +63,17 @@ def evaluate_dataset(
|
|||||||
|
|
||||||
def evaluate(*, cfg: DictDefault, dataset_meta: TrainDatasetMeta) -> Dict[str, float]:
|
def evaluate(*, cfg: DictDefault, dataset_meta: TrainDatasetMeta) -> Dict[str, float]:
|
||||||
"""
|
"""
|
||||||
Evaluate a model on training and validation datasets.
|
Evaluate a model on training and validation datasets
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
cfg: Dictionary mapping `axolotl` config keys to values.
|
cfg: Dictionary mapping `axolotl` config keys to values.
|
||||||
dataset_meta: Dataset metadata containing training and evaluation datasets.
|
dataset_meta: Dataset metadata containing training and evaluation datasets.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dictionary mapping metric names to their values.
|
Tuple containing:
|
||||||
|
- The model (either PeftModel or PreTrainedModel)
|
||||||
|
- The tokenizer
|
||||||
|
- Dictionary of evaluation metrics
|
||||||
"""
|
"""
|
||||||
# pylint: disable=duplicate-code
|
# pylint: disable=duplicate-code
|
||||||
# Enable expandable segments for cuda allocation to improve VRAM usage
|
# Enable expandable segments for cuda allocation to improve VRAM usage
|
||||||
|
|||||||
84
styles.css
84
styles.css
@@ -191,87 +191,3 @@ code span.er {
|
|||||||
color: #5cb85c !important;
|
color: #5cb85c !important;
|
||||||
text-decoration: none !important;
|
text-decoration: none !important;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* API Documentation Styling */
|
|
||||||
|
|
||||||
/* Improve docstring section rendering */
|
|
||||||
.level3 p {
|
|
||||||
white-space: pre-line !important;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Format docstring sections */
|
|
||||||
.level3 p strong {
|
|
||||||
display: block;
|
|
||||||
margin-top: 1em;
|
|
||||||
font-weight: bold;
|
|
||||||
color: var(--cyan);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Add spacing after sections */
|
|
||||||
.level3 p:has(strong) {
|
|
||||||
margin-bottom: 0.5em;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Format Args and Returns sections */
|
|
||||||
p:has(code) {
|
|
||||||
line-height: 1.6;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Function signatures */
|
|
||||||
.sourceCode {
|
|
||||||
margin-bottom: 1.5em;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Parameter tables */
|
|
||||||
.doc-section-parameters table,
|
|
||||||
.doc-section-returns table {
|
|
||||||
margin-top: 1em;
|
|
||||||
margin-bottom: 1.5em;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Make parameter and returns headers smaller */
|
|
||||||
h2.anchored[data-anchor-id="parameters"],
|
|
||||||
h2.anchored[data-anchor-id="returns"],
|
|
||||||
.doc-section-parameters h4,
|
|
||||||
.doc-section-returns h4 {
|
|
||||||
font-size: 1.25rem;
|
|
||||||
margin-top: 2rem;
|
|
||||||
margin-bottom: 1rem;
|
|
||||||
color: var(--lime);
|
|
||||||
border-bottom: 1px solid var(--lime);
|
|
||||||
padding-bottom: 0.3rem;
|
|
||||||
font-family: var(--font-body);
|
|
||||||
font-weight: 500;
|
|
||||||
letter-spacing: normal;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Style documentation tables */
|
|
||||||
table {
|
|
||||||
width: 100%;
|
|
||||||
margin-bottom: 1.5rem;
|
|
||||||
border-collapse: collapse;
|
|
||||||
}
|
|
||||||
|
|
||||||
table th {
|
|
||||||
background-color: #1a1a1a;
|
|
||||||
padding: 0.5rem 1rem;
|
|
||||||
border-bottom: 2px solid var(--greige-600);
|
|
||||||
text-align: left;
|
|
||||||
}
|
|
||||||
|
|
||||||
table td {
|
|
||||||
padding: 0.5rem 1rem;
|
|
||||||
border-bottom: 1px solid var(--greige-600);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Code in table cells */
|
|
||||||
table td code {
|
|
||||||
background-color: transparent !important;
|
|
||||||
padding: 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Improve spacing in parameter and return tables */
|
|
||||||
.doc-section-parameters,
|
|
||||||
.doc-section-returns {
|
|
||||||
margin-top: 1rem;
|
|
||||||
}
|
|
||||||
|
|||||||
Reference in New Issue
Block a user