make it easier for agents to discover docs (#3579) [skip ci]

* make it easier for agents to discover docs

* fixup pr comments
This commit is contained in:
Wing Lian
2026-04-06 10:00:55 -07:00
committed by GitHub
parent 900eec7988
commit 6f15da4cac
6 changed files with 255 additions and 2 deletions

View File

@@ -220,6 +220,16 @@ jobs:
run: |
axolotl --help
- name: Verify agent docs are discoverable
run: |
# Agent docs live in docs/agents/ (source of truth) and are resolved
# at runtime from the repo checkout or via `axolotl fetch docs`
axolotl agent-docs --list
axolotl agent-docs | grep -q "Fine-tuning framework"
axolotl agent-docs grpo | grep -q "GRPO"
axolotl agent-docs sft | grep -q "SFT"
python -c "from axolotl.cli.agent_docs import get_doc, list_topics; assert len(list_topics()) >= 5; assert 'GRPO' in get_doc('grpo')"
- name: Show HF cache
run: hf cache ls

View File

@@ -16,6 +16,9 @@ axolotl inference config.yaml # Interactive inference
axolotl merge-lora config.yaml # Merge LoRA adapter into base model
axolotl vllm-serve config.yaml # Start vLLM server for GRPO/EBFT training
axolotl fetch examples # Download example configs
axolotl agent-docs # Show agent-optimized docs (bundled with pip package)
axolotl agent-docs grpo # Topic-specific agent reference
axolotl config-schema # Dump config JSON schema
```
## Training Methods

View File

@@ -3,4 +3,6 @@ include README.md
include LICENSE
include src/setuptools_axolotl_dynamic_dependencies.py
include src/axolotl/utils/chat_templates/templates/*.jinja
include AGENTS.md
recursive-include docs/agents *.md
recursive-include axolotl *.py

View File

@@ -157,6 +157,29 @@ That's it! Check out our [Getting Started Guide](https://docs.axolotl.ai/docs/ge
- [API Reference](https://docs.axolotl.ai/docs/api/) - Auto-generated code documentation
- [FAQ](https://docs.axolotl.ai/docs/faq.html) - Frequently asked questions
## AI Agent Support
Axolotl ships with built-in documentation optimized for AI coding agents (Claude Code, Cursor, Copilot, etc.). These docs are bundled with the pip package — no repo clone needed.
```bash
# Show overview and available training methods
axolotl agent-docs
# Topic-specific references
axolotl agent-docs sft # supervised fine-tuning
axolotl agent-docs grpo # GRPO online RL
axolotl agent-docs preference_tuning # DPO, KTO, ORPO, SimPO
axolotl agent-docs reward_modelling # outcome and process reward models
axolotl agent-docs pretraining # continual pretraining
axolotl agent-docs --list # list all topics
# Dump config schema for programmatic use
axolotl config-schema
axolotl config-schema --field adapter
```
If you're working with the source repo, agent docs are also available at `docs/agents/` and the project overview is in `AGENTS.md`.
## 🤝 Getting Help
- Join our [Discord community](https://discord.gg/HhrNrHJPRb) for support

View File

@@ -0,0 +1,106 @@
"""Bundled agent documentation for axolotl.
These docs are optimized for consumption by AI coding agents.
The source of truth is docs/agents/*.md and AGENTS.md in the repo root.
This module resolves those paths at runtime — no files are duplicated
into the package.
For pip-only installs (no repo checkout), run `axolotl fetch docs` first
to download the docs locally.
"""
from pathlib import Path
# Topic name -> (filename in docs/agents/, fallback filename for AGENTS.md)
TOPICS = {
"overview": "AGENTS.md",
"sft": "docs/agents/sft.md",
"grpo": "docs/agents/grpo.md",
"preference_tuning": "docs/agents/preference_tuning.md",
"reward_modelling": "docs/agents/reward_modelling.md",
"pretraining": "docs/agents/pretraining.md",
}
def _find_repo_root() -> Path | None:
"""Walk up from this file to find the repo root (contains AGENTS.md)."""
# In an editable install or repo checkout, walk up from
# src/axolotl/cli/agent_docs/ to find the repo root
current = Path(__file__).resolve().parent
while current != current.parent:
if (current / "AGENTS.md").exists() and (current / "docs" / "agents").is_dir():
return current
current = current.parent
return None
def _find_docs_dir() -> Path | None:
"""Find a fetched docs directory (from `axolotl fetch docs`)."""
# axolotl fetch docs --dest defaults to ./docs/ in cwd
cwd_docs = Path.cwd() / "docs" / "agents"
if cwd_docs.is_dir():
return Path.cwd()
return None
def _resolve_path(topic: str) -> Path:
"""Resolve a topic name to the actual file path."""
if topic not in TOPICS:
available = ", ".join(sorted(TOPICS.keys()))
raise FileNotFoundError(f"Unknown topic: {topic!r}. Available: {available}")
relative_path = TOPICS[topic]
# Try repo root first (editable install / repo checkout)
repo_root = _find_repo_root()
if repo_root:
candidate = repo_root / relative_path
if candidate.exists():
return candidate
# Try cwd (fetched docs via `axolotl fetch docs`)
docs_root = _find_docs_dir()
if docs_root:
candidate = docs_root / relative_path
if candidate.exists():
return candidate
# Also check cwd directly for AGENTS.md
if topic == "overview":
cwd_agents = Path.cwd() / "AGENTS.md"
if cwd_agents.exists():
return cwd_agents
raise FileNotFoundError(
f"Could not find {relative_path!r}. "
f"If you installed axolotl via pip, run `axolotl fetch docs` first "
f"to download the documentation."
)
def get_doc(topic: str = "overview") -> str:
"""Return the content of an agent doc by topic name.
Args:
topic: One of the keys in TOPICS, or "overview" (default).
Returns:
The markdown content of the doc.
Raises:
FileNotFoundError: If the topic can't be found.
"""
return _resolve_path(topic).read_text()
def list_topics() -> dict[str, str]:
"""Return a dict of topic name -> first line (title) of each doc."""
result = {}
for topic in sorted(TOPICS.keys()):
try:
path = _resolve_path(topic)
first_line = path.read_text().split("\n", 1)[0].lstrip("# ").strip()
result[topic] = first_line
except FileNotFoundError:
result[topic] = "(not found — run `axolotl fetch docs`)"
return result

View File

@@ -294,7 +294,9 @@ def merge_lora(config: str, **kwargs):
@cli.command()
@click.argument("directory", type=click.Choice(["examples", "deepspeed_configs"]))
@click.argument(
"directory", type=click.Choice(["examples", "deepspeed_configs", "docs"])
)
@click.option("--dest", help="Destination directory")
def fetch(directory: str, dest: Optional[str]):
"""
@@ -303,9 +305,10 @@ def fetch(directory: str, dest: Optional[str]):
Available directories:
- examples: Example configuration files
- deepspeed_configs: DeepSpeed configuration files
- docs: Full documentation (Quarto markdown files)
Args:
directory: One of `examples`, `deepspeed_configs`.
directory: One of `examples`, `deepspeed_configs`, `docs`.
dest: Optional destination directory.
"""
fetch_from_github(f"{directory}/", dest)
@@ -340,6 +343,112 @@ def delinearize_llama4(model: str, output: str):
do_delinearize_llama4(model, output)
@cli.command("agent-docs")
@click.argument("topic", required=False, default=None)
@click.option("--list", "list_topics", is_flag=True, help="List available topics")
def agent_docs(topic: Optional[str], list_topics: bool):
"""Show agent-optimized documentation.
Prints reference docs designed for AI coding agents.
These docs are bundled with the package — no network access needed.
\b
Examples:
axolotl agent-docs # overview (start here)
axolotl agent-docs grpo # GRPO reference
axolotl agent-docs sft # SFT reference
axolotl agent-docs --list # list all topics
"""
from axolotl.cli.agent_docs import get_doc, list_topics as _list_topics
if list_topics:
for name, title in _list_topics().items():
click.echo(f" {name:25s} {title}")
return
if topic is None:
topic = "overview"
try:
click.echo(get_doc(topic))
except FileNotFoundError as exc:
raise click.BadParameter(str(exc)) from exc
@cli.command("config-schema")
@click.option(
"--format",
"output_format",
type=click.Choice(["json", "yaml"]),
default="json",
help="Output format (default: json)",
)
@click.option("--field", help="Show schema for a specific field only")
def config_schema(output_format: str, field: Optional[str]):
"""Dump the full config JSON schema.
Useful for AI agents and tooling to discover all available config options,
their types, defaults, and descriptions.
\b
Examples:
axolotl config-schema # full JSON schema
axolotl config-schema --format yaml # YAML format
axolotl config-schema --field adapter # single field
"""
import json
try:
schema = AxolotlInputConfig.model_json_schema()
except (TypeError, ValueError, AttributeError) as exc:
# Fallback: dump field names, types, and defaults when full schema
# generation fails (e.g. torch.dtype not JSON-serializable)
LOG.warning(
"Full JSON schema generation failed, using simplified fallback: %s", exc
)
fields = {}
for name, field_info in AxolotlInputConfig.model_fields.items():
entry = {}
if field_info.description:
entry["description"] = field_info.description
if field_info.default is not None:
try:
json.dumps(field_info.default)
entry["default"] = field_info.default
except (TypeError, ValueError):
entry["default"] = str(field_info.default)
annotation = field_info.annotation
if annotation is not None:
entry["type"] = str(annotation)
fields[name] = entry
schema = {
"properties": fields,
"_note": "simplified schema (full generation failed)",
}
if field:
props = schema.get("properties", {})
if field not in props:
# Try case-insensitive match
matches = [k for k in props if k.lower() == field.lower()]
if matches:
field = matches[0]
else:
raise click.BadParameter(
f"Unknown field: {field!r}. "
f"Omit --field to dump the full schema, "
f"or pipe to jq: axolotl config-schema | jq '.properties | keys'"
)
schema = {field: props[field]}
if output_format == "yaml":
import yaml # pylint: disable=import-outside-toplevel
click.echo(yaml.dump(schema, default_flow_style=False, sort_keys=False))
else:
click.echo(json.dumps(schema, indent=2))
cli.add_command(lm_eval)