* add grpo scale_rewards config for trl#3135 * options to connect to vllm server directly w grpo trl#3094 * temperature support trl#3029 * sampling/generation kwargs for grpo trl#2989 * make vllm_enable_prefix_caching a config param trl#2900 * grpo multi-step optimizeations trl#2899 * remove overrides for grpo trainer * bump trl to 0.16.0 * add cli to start vllm-serve via trl * call the python module directly * update to use vllm with 2.6.0 too now and call trl vllm serve from module * vllm 0.8.1 * use python3 * use sys.executable * remove context and wait for start * fixes to make it actually work * fixes so the grpo tests pass with new vllm paradigm * explicit host/port and check in start vllm * make sure that vllm doesn't hang by setting quiet so outouts go to dev null * also bump bnb to latest release * add option for wait from cli and nccl debugging for ci * grpo + vllm test on separate devices for now * make sure grpo + vllm tests runs single worker since pynccl comms would conflict * fix cli * remove wait and add caching for argilla dataset * refactoring configs * chore: lint * add vllm config * fixup vllm grpo args * fix one more incorrect schema/config path * fix another vlllm reference and increase timeout * make the tests run a bit faster * change mbsz back so it is correct for grpo * another change mbsz back so it is correct for grpo * fixing cli args * nits * adding docs * docs * include tensor parallel size for vllm in pydantic schema * moving start_vllm, more docs * limit output len for grpo vllm * vllm enable_prefix_caching isn't a bool cli arg * fix env ordering in tests and also use pid check when looking for vllm --------- Co-authored-by: Salman Mohammadi <salman.mohammadi@outlook.com>
279 lines
7.8 KiB
YAML
279 lines
7.8 KiB
YAML
project:
|
|
type: website
|
|
|
|
quartodoc:
|
|
dir: docs/api
|
|
package: axolotl
|
|
title: API Reference
|
|
parser: google
|
|
|
|
sections:
|
|
- title: Core
|
|
desc: Core functionality for training
|
|
contents:
|
|
- train
|
|
- evaluate
|
|
- datasets
|
|
- convert
|
|
- prompt_tokenizers
|
|
- logging_config
|
|
- core.trainer_builder
|
|
- core.training_args
|
|
- core.chat.messages
|
|
- core.chat.format.chatml
|
|
- core.chat.format.llama3x
|
|
- core.chat.format.shared
|
|
- core.datasets.chat
|
|
- core.datasets.transforms.chat_builder
|
|
- title: CLI
|
|
desc: Command-line interface
|
|
contents:
|
|
- cli.main
|
|
- cli.train
|
|
- cli.evaluate
|
|
- cli.args
|
|
- cli.checks
|
|
- cli.config
|
|
- cli.inference
|
|
- cli.merge_lora
|
|
- cli.merge_sharded_fsdp_weights
|
|
- cli.preprocess
|
|
- cli.sweeps
|
|
- cli.utils
|
|
- cli.vllm_serve
|
|
- cli.cloud.base
|
|
- cli.cloud.modal_
|
|
- title: Trainers
|
|
desc: Training implementations
|
|
contents:
|
|
- core.trainers.base
|
|
- core.trainers.trl
|
|
- core.trainers.dpo.trainer
|
|
- core.trainers.grpo.trainer
|
|
- title: Prompt Strategies
|
|
desc: Prompt formatting strategies
|
|
contents:
|
|
- prompt_strategies.base
|
|
- prompt_strategies.chat_template
|
|
- prompt_strategies.alpaca_chat
|
|
- prompt_strategies.alpaca_instruct
|
|
- prompt_strategies.alpaca_w_system
|
|
- prompt_strategies.user_defined
|
|
- prompt_strategies.llama2_chat
|
|
- prompt_strategies.completion
|
|
- prompt_strategies.input_output
|
|
- prompt_strategies.stepwise_supervised
|
|
- prompt_strategies.metharme
|
|
- prompt_strategies.orcamini
|
|
- prompt_strategies.pygmalion
|
|
- prompt_strategies.messages.chat
|
|
- prompt_strategies.dpo.chat_template
|
|
- prompt_strategies.dpo.llama3
|
|
- prompt_strategies.dpo.chatml
|
|
- prompt_strategies.dpo.zephyr
|
|
- prompt_strategies.dpo.user_defined
|
|
- prompt_strategies.dpo.passthrough
|
|
- prompt_strategies.kto.llama3
|
|
- prompt_strategies.kto.chatml
|
|
- prompt_strategies.kto.user_defined
|
|
- prompt_strategies.orpo.chat_template
|
|
- prompt_strategies.bradley_terry.llama3
|
|
- title: Kernels
|
|
desc: Low-level performance optimizations
|
|
contents:
|
|
- kernels.lora
|
|
- kernels.geglu
|
|
- kernels.swiglu
|
|
- kernels.quantize
|
|
- kernels.utils
|
|
- title: MonkeyPatches
|
|
desc: Runtime patches for model optimizations
|
|
contents:
|
|
- monkeypatch.llama_attn_hijack_flash
|
|
- monkeypatch.llama_attn_hijack_xformers
|
|
- monkeypatch.mistral_attn_hijack_flash
|
|
- monkeypatch.multipack
|
|
- monkeypatch.relora
|
|
- monkeypatch.llama_expand_mask
|
|
- monkeypatch.lora_kernels
|
|
- monkeypatch.utils
|
|
- monkeypatch.btlm_attn_hijack_flash
|
|
- monkeypatch.llama_patch_multipack
|
|
- monkeypatch.stablelm_attn_hijack_flash
|
|
- monkeypatch.trainer_fsdp_optim
|
|
- monkeypatch.transformers_fa_utils
|
|
- monkeypatch.unsloth_
|
|
- monkeypatch.attention.mllama
|
|
- monkeypatch.data.batch_dataset_fetcher
|
|
- monkeypatch.mixtral
|
|
- title: Utils
|
|
desc: Utility functions
|
|
contents:
|
|
- utils.models
|
|
- utils.tokenization
|
|
- utils.chat_templates
|
|
- utils.lora
|
|
- utils.lora_embeddings
|
|
- utils.model_shard_quant
|
|
- utils.bench
|
|
- utils.freeze
|
|
- utils.trainer
|
|
- utils.schedulers
|
|
- utils.distributed
|
|
- utils.dict
|
|
- utils.optimizers.adopt
|
|
- utils.data.pretraining
|
|
- utils.data.sft
|
|
- utils.gradient_checkpointing.unsloth
|
|
- title: Schemas
|
|
desc: Pydantic data models for Axolotl config
|
|
contents:
|
|
- utils.schemas.config
|
|
- utils.schemas.model
|
|
- utils.schemas.training
|
|
- utils.schemas.datasets
|
|
- utils.schemas.peft
|
|
- utils.schemas.trl
|
|
- utils.schemas.multimodal
|
|
- utils.schemas.integrations
|
|
- utils.schemas.enums
|
|
- utils.schemas.utils
|
|
- title: Integrations
|
|
desc: Third-party integrations and extensions
|
|
contents:
|
|
- integrations.base
|
|
- integrations.cut_cross_entropy.args
|
|
- integrations.grokfast.optimizer
|
|
- integrations.kd.trainer
|
|
- integrations.liger.args
|
|
- integrations.lm_eval.args
|
|
- integrations.spectrum.args
|
|
- title: Common
|
|
desc: Common utilities and shared functionality
|
|
contents:
|
|
- common.architectures
|
|
- common.const
|
|
- common.datasets
|
|
- title: Models
|
|
desc: Custom model implementations
|
|
contents:
|
|
- models.mamba.modeling_mamba
|
|
- title: Data Processing
|
|
desc: Data processing utilities
|
|
contents:
|
|
- utils.collators.core
|
|
- utils.collators.batching
|
|
- utils.collators.mamba
|
|
- utils.collators.mm_chat
|
|
- utils.samplers.multipack
|
|
- title: Callbacks
|
|
desc: Training callbacks
|
|
contents:
|
|
- utils.callbacks.perplexity
|
|
- utils.callbacks.profiler
|
|
- utils.callbacks.lisa
|
|
- utils.callbacks.mlflow_
|
|
- utils.callbacks.comet_
|
|
|
|
website:
|
|
title: "Axolotl"
|
|
description: "We make fine-tuning accessible, scalable, and fun"
|
|
favicon: favicon.jpg
|
|
|
|
navbar:
|
|
logo: image/axolotl_logo_digital_white.svg
|
|
title: false
|
|
background: dark
|
|
pinned: false
|
|
collapse: false
|
|
tools:
|
|
- icon: twitter
|
|
href: https://twitter.com/axolotl_ai
|
|
- icon: github
|
|
href: https://github.com/axolotl-ai-cloud/axolotl/
|
|
- icon: discord
|
|
href: https://discord.gg/7m9sfhzaf3
|
|
|
|
sidebar:
|
|
pinned: true
|
|
collapse-level: 2
|
|
style: docked
|
|
contents:
|
|
- text: Home
|
|
href: index.qmd
|
|
|
|
- section: "Getting Started"
|
|
contents:
|
|
- docs/getting-started.qmd
|
|
- docs/installation.qmd
|
|
- docs/inference.qmd
|
|
- docs/cli.qmd
|
|
- docs/config.qmd
|
|
- text: "API Reference"
|
|
href: docs/api
|
|
|
|
- section: "Dataset Formats"
|
|
contents: docs/dataset-formats/*
|
|
|
|
- section: "Deployments"
|
|
contents:
|
|
- docs/docker.qmd
|
|
- docs/multi-gpu.qmd
|
|
- docs/multi-node.qmd
|
|
- docs/ray-integration.qmd
|
|
- docs/amd_hpc.qmd
|
|
- docs/mac.qmd
|
|
|
|
- section: "How To Guides"
|
|
contents:
|
|
- docs/multimodal.qmd
|
|
- docs/rlhf.qmd
|
|
- docs/reward_modelling.qmd
|
|
- docs/lr_groups.qmd
|
|
- docs/lora_optims.qmd
|
|
|
|
- section: "Core Concepts"
|
|
contents:
|
|
- docs/batch_vs_grad.qmd
|
|
- docs/dataset_preprocessing.qmd
|
|
- docs/multipack.qmd
|
|
|
|
- section: "Advanced Features"
|
|
contents:
|
|
- docs/fsdp_qlora.qmd
|
|
- docs/unsloth.qmd
|
|
- docs/torchao.qmd
|
|
- docs/custom_integrations.qmd
|
|
- docs/sequence_parallelism.qmd
|
|
|
|
- section: "Troubleshooting"
|
|
contents:
|
|
- docs/faq.qmd
|
|
- docs/debugging.qmd
|
|
- docs/nccl.qmd
|
|
|
|
format:
|
|
html:
|
|
theme: darkly
|
|
css: styles.css
|
|
toc: true
|
|
# Enable better handling of line breaks in markdown
|
|
preserve-tabs: true
|
|
html-math-method: mathjax
|
|
# Improved markdown processing options
|
|
md-extensions:
|
|
- markdown_it
|
|
- def_list
|
|
- attr_list
|
|
- fenced_divs
|
|
- tables
|
|
- html_admonition
|
|
- lineblocks
|
|
- fancy_lists
|
|
# Control whitespace handling
|
|
whitespace: preserve
|
|
# Process newlines in paragraphs
|
|
wrap: preserve
|
|
# Better line break handling
|
|
preserve-linebreaks: true
|