make it easier for agents to discover docs (#3579) [skip ci]
* make it easier for agents to discover docs * fixup pr comments
This commit is contained in:
10
.github/workflows/tests.yml
vendored
10
.github/workflows/tests.yml
vendored
@@ -220,6 +220,16 @@ jobs:
|
||||
run: |
|
||||
axolotl --help
|
||||
|
||||
- name: Verify agent docs are discoverable
|
||||
run: |
|
||||
# Agent docs live in docs/agents/ (source of truth) and are resolved
|
||||
# at runtime from the repo checkout or via `axolotl fetch docs`
|
||||
axolotl agent-docs --list
|
||||
axolotl agent-docs | grep -q "Fine-tuning framework"
|
||||
axolotl agent-docs grpo | grep -q "GRPO"
|
||||
axolotl agent-docs sft | grep -q "SFT"
|
||||
python -c "from axolotl.cli.agent_docs import get_doc, list_topics; assert len(list_topics()) >= 5; assert 'GRPO' in get_doc('grpo')"
|
||||
|
||||
- name: Show HF cache
|
||||
run: hf cache ls
|
||||
|
||||
|
||||
@@ -16,6 +16,9 @@ axolotl inference config.yaml # Interactive inference
|
||||
axolotl merge-lora config.yaml # Merge LoRA adapter into base model
|
||||
axolotl vllm-serve config.yaml # Start vLLM server for GRPO/EBFT training
|
||||
axolotl fetch examples # Download example configs
|
||||
axolotl agent-docs # Show agent-optimized docs (bundled with pip package)
|
||||
axolotl agent-docs grpo # Topic-specific agent reference
|
||||
axolotl config-schema # Dump config JSON schema
|
||||
```
|
||||
|
||||
## Training Methods
|
||||
|
||||
@@ -3,4 +3,6 @@ include README.md
|
||||
include LICENSE
|
||||
include src/setuptools_axolotl_dynamic_dependencies.py
|
||||
include src/axolotl/utils/chat_templates/templates/*.jinja
|
||||
include AGENTS.md
|
||||
recursive-include docs/agents *.md
|
||||
recursive-include axolotl *.py
|
||||
|
||||
23
README.md
23
README.md
@@ -157,6 +157,29 @@ That's it! Check out our [Getting Started Guide](https://docs.axolotl.ai/docs/ge
|
||||
- [API Reference](https://docs.axolotl.ai/docs/api/) - Auto-generated code documentation
|
||||
- [FAQ](https://docs.axolotl.ai/docs/faq.html) - Frequently asked questions
|
||||
|
||||
## AI Agent Support
|
||||
|
||||
Axolotl ships with built-in documentation optimized for AI coding agents (Claude Code, Cursor, Copilot, etc.). These docs are bundled with the pip package — no repo clone needed.
|
||||
|
||||
```bash
|
||||
# Show overview and available training methods
|
||||
axolotl agent-docs
|
||||
|
||||
# Topic-specific references
|
||||
axolotl agent-docs sft # supervised fine-tuning
|
||||
axolotl agent-docs grpo # GRPO online RL
|
||||
axolotl agent-docs preference_tuning # DPO, KTO, ORPO, SimPO
|
||||
axolotl agent-docs reward_modelling # outcome and process reward models
|
||||
axolotl agent-docs pretraining # continual pretraining
|
||||
axolotl agent-docs --list # list all topics
|
||||
|
||||
# Dump config schema for programmatic use
|
||||
axolotl config-schema
|
||||
axolotl config-schema --field adapter
|
||||
```
|
||||
|
||||
If you're working with the source repo, agent docs are also available at `docs/agents/` and the project overview is in `AGENTS.md`.
|
||||
|
||||
## 🤝 Getting Help
|
||||
|
||||
- Join our [Discord community](https://discord.gg/HhrNrHJPRb) for support
|
||||
|
||||
106
src/axolotl/cli/agent_docs/__init__.py
Normal file
106
src/axolotl/cli/agent_docs/__init__.py
Normal file
@@ -0,0 +1,106 @@
|
||||
"""Bundled agent documentation for axolotl.
|
||||
|
||||
These docs are optimized for consumption by AI coding agents.
|
||||
The source of truth is docs/agents/*.md and AGENTS.md in the repo root.
|
||||
This module resolves those paths at runtime — no files are duplicated
|
||||
into the package.
|
||||
|
||||
For pip-only installs (no repo checkout), run `axolotl fetch docs` first
|
||||
to download the docs locally.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
# Topic name -> (filename in docs/agents/, fallback filename for AGENTS.md)
|
||||
TOPICS = {
|
||||
"overview": "AGENTS.md",
|
||||
"sft": "docs/agents/sft.md",
|
||||
"grpo": "docs/agents/grpo.md",
|
||||
"preference_tuning": "docs/agents/preference_tuning.md",
|
||||
"reward_modelling": "docs/agents/reward_modelling.md",
|
||||
"pretraining": "docs/agents/pretraining.md",
|
||||
}
|
||||
|
||||
|
||||
def _find_repo_root() -> Path | None:
|
||||
"""Walk up from this file to find the repo root (contains AGENTS.md)."""
|
||||
# In an editable install or repo checkout, walk up from
|
||||
# src/axolotl/cli/agent_docs/ to find the repo root
|
||||
current = Path(__file__).resolve().parent
|
||||
while current != current.parent:
|
||||
if (current / "AGENTS.md").exists() and (current / "docs" / "agents").is_dir():
|
||||
return current
|
||||
current = current.parent
|
||||
return None
|
||||
|
||||
|
||||
def _find_docs_dir() -> Path | None:
|
||||
"""Find a fetched docs directory (from `axolotl fetch docs`)."""
|
||||
# axolotl fetch docs --dest defaults to ./docs/ in cwd
|
||||
cwd_docs = Path.cwd() / "docs" / "agents"
|
||||
if cwd_docs.is_dir():
|
||||
return Path.cwd()
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_path(topic: str) -> Path:
|
||||
"""Resolve a topic name to the actual file path."""
|
||||
if topic not in TOPICS:
|
||||
available = ", ".join(sorted(TOPICS.keys()))
|
||||
raise FileNotFoundError(f"Unknown topic: {topic!r}. Available: {available}")
|
||||
|
||||
relative_path = TOPICS[topic]
|
||||
|
||||
# Try repo root first (editable install / repo checkout)
|
||||
repo_root = _find_repo_root()
|
||||
if repo_root:
|
||||
candidate = repo_root / relative_path
|
||||
if candidate.exists():
|
||||
return candidate
|
||||
|
||||
# Try cwd (fetched docs via `axolotl fetch docs`)
|
||||
docs_root = _find_docs_dir()
|
||||
if docs_root:
|
||||
candidate = docs_root / relative_path
|
||||
if candidate.exists():
|
||||
return candidate
|
||||
|
||||
# Also check cwd directly for AGENTS.md
|
||||
if topic == "overview":
|
||||
cwd_agents = Path.cwd() / "AGENTS.md"
|
||||
if cwd_agents.exists():
|
||||
return cwd_agents
|
||||
|
||||
raise FileNotFoundError(
|
||||
f"Could not find {relative_path!r}. "
|
||||
f"If you installed axolotl via pip, run `axolotl fetch docs` first "
|
||||
f"to download the documentation."
|
||||
)
|
||||
|
||||
|
||||
def get_doc(topic: str = "overview") -> str:
|
||||
"""Return the content of an agent doc by topic name.
|
||||
|
||||
Args:
|
||||
topic: One of the keys in TOPICS, or "overview" (default).
|
||||
|
||||
Returns:
|
||||
The markdown content of the doc.
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If the topic can't be found.
|
||||
"""
|
||||
return _resolve_path(topic).read_text()
|
||||
|
||||
|
||||
def list_topics() -> dict[str, str]:
|
||||
"""Return a dict of topic name -> first line (title) of each doc."""
|
||||
result = {}
|
||||
for topic in sorted(TOPICS.keys()):
|
||||
try:
|
||||
path = _resolve_path(topic)
|
||||
first_line = path.read_text().split("\n", 1)[0].lstrip("# ").strip()
|
||||
result[topic] = first_line
|
||||
except FileNotFoundError:
|
||||
result[topic] = "(not found — run `axolotl fetch docs`)"
|
||||
return result
|
||||
@@ -294,7 +294,9 @@ def merge_lora(config: str, **kwargs):
|
||||
|
||||
|
||||
@cli.command()
|
||||
@click.argument("directory", type=click.Choice(["examples", "deepspeed_configs"]))
|
||||
@click.argument(
|
||||
"directory", type=click.Choice(["examples", "deepspeed_configs", "docs"])
|
||||
)
|
||||
@click.option("--dest", help="Destination directory")
|
||||
def fetch(directory: str, dest: Optional[str]):
|
||||
"""
|
||||
@@ -303,9 +305,10 @@ def fetch(directory: str, dest: Optional[str]):
|
||||
Available directories:
|
||||
- examples: Example configuration files
|
||||
- deepspeed_configs: DeepSpeed configuration files
|
||||
- docs: Full documentation (Quarto markdown files)
|
||||
|
||||
Args:
|
||||
directory: One of `examples`, `deepspeed_configs`.
|
||||
directory: One of `examples`, `deepspeed_configs`, `docs`.
|
||||
dest: Optional destination directory.
|
||||
"""
|
||||
fetch_from_github(f"{directory}/", dest)
|
||||
@@ -340,6 +343,112 @@ def delinearize_llama4(model: str, output: str):
|
||||
do_delinearize_llama4(model, output)
|
||||
|
||||
|
||||
@cli.command("agent-docs")
|
||||
@click.argument("topic", required=False, default=None)
|
||||
@click.option("--list", "list_topics", is_flag=True, help="List available topics")
|
||||
def agent_docs(topic: Optional[str], list_topics: bool):
|
||||
"""Show agent-optimized documentation.
|
||||
|
||||
Prints reference docs designed for AI coding agents.
|
||||
These docs are bundled with the package — no network access needed.
|
||||
|
||||
\b
|
||||
Examples:
|
||||
axolotl agent-docs # overview (start here)
|
||||
axolotl agent-docs grpo # GRPO reference
|
||||
axolotl agent-docs sft # SFT reference
|
||||
axolotl agent-docs --list # list all topics
|
||||
"""
|
||||
from axolotl.cli.agent_docs import get_doc, list_topics as _list_topics
|
||||
|
||||
if list_topics:
|
||||
for name, title in _list_topics().items():
|
||||
click.echo(f" {name:25s} {title}")
|
||||
return
|
||||
|
||||
if topic is None:
|
||||
topic = "overview"
|
||||
|
||||
try:
|
||||
click.echo(get_doc(topic))
|
||||
except FileNotFoundError as exc:
|
||||
raise click.BadParameter(str(exc)) from exc
|
||||
|
||||
|
||||
@cli.command("config-schema")
|
||||
@click.option(
|
||||
"--format",
|
||||
"output_format",
|
||||
type=click.Choice(["json", "yaml"]),
|
||||
default="json",
|
||||
help="Output format (default: json)",
|
||||
)
|
||||
@click.option("--field", help="Show schema for a specific field only")
|
||||
def config_schema(output_format: str, field: Optional[str]):
|
||||
"""Dump the full config JSON schema.
|
||||
|
||||
Useful for AI agents and tooling to discover all available config options,
|
||||
their types, defaults, and descriptions.
|
||||
|
||||
\b
|
||||
Examples:
|
||||
axolotl config-schema # full JSON schema
|
||||
axolotl config-schema --format yaml # YAML format
|
||||
axolotl config-schema --field adapter # single field
|
||||
"""
|
||||
import json
|
||||
|
||||
try:
|
||||
schema = AxolotlInputConfig.model_json_schema()
|
||||
except (TypeError, ValueError, AttributeError) as exc:
|
||||
# Fallback: dump field names, types, and defaults when full schema
|
||||
# generation fails (e.g. torch.dtype not JSON-serializable)
|
||||
LOG.warning(
|
||||
"Full JSON schema generation failed, using simplified fallback: %s", exc
|
||||
)
|
||||
fields = {}
|
||||
for name, field_info in AxolotlInputConfig.model_fields.items():
|
||||
entry = {}
|
||||
if field_info.description:
|
||||
entry["description"] = field_info.description
|
||||
if field_info.default is not None:
|
||||
try:
|
||||
json.dumps(field_info.default)
|
||||
entry["default"] = field_info.default
|
||||
except (TypeError, ValueError):
|
||||
entry["default"] = str(field_info.default)
|
||||
annotation = field_info.annotation
|
||||
if annotation is not None:
|
||||
entry["type"] = str(annotation)
|
||||
fields[name] = entry
|
||||
schema = {
|
||||
"properties": fields,
|
||||
"_note": "simplified schema (full generation failed)",
|
||||
}
|
||||
|
||||
if field:
|
||||
props = schema.get("properties", {})
|
||||
if field not in props:
|
||||
# Try case-insensitive match
|
||||
matches = [k for k in props if k.lower() == field.lower()]
|
||||
if matches:
|
||||
field = matches[0]
|
||||
else:
|
||||
raise click.BadParameter(
|
||||
f"Unknown field: {field!r}. "
|
||||
f"Omit --field to dump the full schema, "
|
||||
f"or pipe to jq: axolotl config-schema | jq '.properties | keys'"
|
||||
)
|
||||
schema = {field: props[field]}
|
||||
|
||||
if output_format == "yaml":
|
||||
import yaml # pylint: disable=import-outside-toplevel
|
||||
|
||||
click.echo(yaml.dump(schema, default_flow_style=False, sort_keys=False))
|
||||
else:
|
||||
click.echo(json.dumps(schema, indent=2))
|
||||
|
||||
|
||||
cli.add_command(lm_eval)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user