make it easier for agents to discover docs (#3579) [skip ci]

* make it easier for agents to discover docs * fixup pr comments
2026-04-06 10:00:55 -07:00
parent 900eec7988
commit 6f15da4cac
6 changed files with 255 additions and 2 deletions
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -220,6 +220,16 @@ jobs:
        run: |
          axolotl --help

+      - name: Verify agent docs are discoverable
+        run: |
+          # Agent docs live in docs/agents/ (source of truth) and are resolved
+          # at runtime from the repo checkout or via `axolotl fetch docs`
+          axolotl agent-docs --list
+          axolotl agent-docs | grep -q "Fine-tuning framework"
+          axolotl agent-docs grpo | grep -q "GRPO"
+          axolotl agent-docs sft | grep -q "SFT"
+          python -c "from axolotl.cli.agent_docs import get_doc, list_topics; assert len(list_topics()) >= 5; assert 'GRPO' in get_doc('grpo')"
+
      - name: Show HF cache
        run: hf cache ls

--- a/AGENTS.md
+++ b/AGENTS.md
@@ -16,6 +16,9 @@ axolotl inference config.yaml          # Interactive inference
 axolotl merge-lora config.yaml         # Merge LoRA adapter into base model
 axolotl vllm-serve config.yaml         # Start vLLM server for GRPO/EBFT training
 axolotl fetch examples                 # Download example configs
+axolotl agent-docs                     # Show agent-optimized docs (bundled with pip package)
+axolotl agent-docs grpo                # Topic-specific agent reference
+axolotl config-schema                  # Dump config JSON schema
 ```

 ## Training Methods
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -3,4 +3,6 @@ include README.md
 include LICENSE
 include src/setuptools_axolotl_dynamic_dependencies.py
 include src/axolotl/utils/chat_templates/templates/*.jinja
+include AGENTS.md
+recursive-include docs/agents *.md
 recursive-include axolotl *.py
--- a/README.md
+++ b/README.md
@@ -157,6 +157,29 @@ That's it! Check out our [Getting Started Guide](https://docs.axolotl.ai/docs/ge
 - [API Reference](https://docs.axolotl.ai/docs/api/) - Auto-generated code documentation
 - [FAQ](https://docs.axolotl.ai/docs/faq.html) - Frequently asked questions

+## AI Agent Support
+
+Axolotl ships with built-in documentation optimized for AI coding agents (Claude Code, Cursor, Copilot, etc.). These docs are bundled with the pip package — no repo clone needed.
+
+```bash
+# Show overview and available training methods
+axolotl agent-docs
+
+# Topic-specific references
+axolotl agent-docs sft                 # supervised fine-tuning
+axolotl agent-docs grpo                # GRPO online RL
+axolotl agent-docs preference_tuning   # DPO, KTO, ORPO, SimPO
+axolotl agent-docs reward_modelling    # outcome and process reward models
+axolotl agent-docs pretraining         # continual pretraining
+axolotl agent-docs --list              # list all topics
+
+# Dump config schema for programmatic use
+axolotl config-schema
+axolotl config-schema --field adapter
+```
+
+If you're working with the source repo, agent docs are also available at `docs/agents/` and the project overview is in `AGENTS.md`.
+
 ## 🤝 Getting Help

 - Join our [Discord community](https://discord.gg/HhrNrHJPRb) for support
--- a/src/axolotl/cli/agent_docs/init.py
+++ b/src/axolotl/cli/agent_docs/init.py
@@ -0,0 +1,106 @@
+"""Bundled agent documentation for axolotl.
+
+These docs are optimized for consumption by AI coding agents.
+The source of truth is docs/agents/*.md and AGENTS.md in the repo root.
+This module resolves those paths at runtime — no files are duplicated
+into the package.
+
+For pip-only installs (no repo checkout), run `axolotl fetch docs` first
+to download the docs locally.
+"""
+
+from pathlib import Path
+
+# Topic name -> (filename in docs/agents/, fallback filename for AGENTS.md)
+TOPICS = {
+    "overview": "AGENTS.md",
+    "sft": "docs/agents/sft.md",
+    "grpo": "docs/agents/grpo.md",
+    "preference_tuning": "docs/agents/preference_tuning.md",
+    "reward_modelling": "docs/agents/reward_modelling.md",
+    "pretraining": "docs/agents/pretraining.md",
+}
+
+
+def _find_repo_root() -> Path | None:
+    """Walk up from this file to find the repo root (contains AGENTS.md)."""
+    # In an editable install or repo checkout, walk up from
+    # src/axolotl/cli/agent_docs/ to find the repo root
+    current = Path(__file__).resolve().parent
+    while current != current.parent:
+        if (current / "AGENTS.md").exists() and (current / "docs" / "agents").is_dir():
+            return current
+        current = current.parent
+    return None
+
+
+def _find_docs_dir() -> Path | None:
+    """Find a fetched docs directory (from `axolotl fetch docs`)."""
+    # axolotl fetch docs --dest defaults to ./docs/ in cwd
+    cwd_docs = Path.cwd() / "docs" / "agents"
+    if cwd_docs.is_dir():
+        return Path.cwd()
+    return None
+
+
+def _resolve_path(topic: str) -> Path:
+    """Resolve a topic name to the actual file path."""
+    if topic not in TOPICS:
+        available = ", ".join(sorted(TOPICS.keys()))
+        raise FileNotFoundError(f"Unknown topic: {topic!r}. Available: {available}")
+
+    relative_path = TOPICS[topic]
+
+    # Try repo root first (editable install / repo checkout)
+    repo_root = _find_repo_root()
+    if repo_root:
+        candidate = repo_root / relative_path
+        if candidate.exists():
+            return candidate
+
+    # Try cwd (fetched docs via `axolotl fetch docs`)
+    docs_root = _find_docs_dir()
+    if docs_root:
+        candidate = docs_root / relative_path
+        if candidate.exists():
+            return candidate
+
+    # Also check cwd directly for AGENTS.md
+    if topic == "overview":
+        cwd_agents = Path.cwd() / "AGENTS.md"
+        if cwd_agents.exists():
+            return cwd_agents
+
+    raise FileNotFoundError(
+        f"Could not find {relative_path!r}. "
+        f"If you installed axolotl via pip, run `axolotl fetch docs` first "
+        f"to download the documentation."
+    )
+
+
+def get_doc(topic: str = "overview") -> str:
+    """Return the content of an agent doc by topic name.
+
+    Args:
+        topic: One of the keys in TOPICS, or "overview" (default).
+
+    Returns:
+        The markdown content of the doc.
+
+    Raises:
+        FileNotFoundError: If the topic can't be found.
+    """
+    return _resolve_path(topic).read_text()
+
+
+def list_topics() -> dict[str, str]:
+    """Return a dict of topic name -> first line (title) of each doc."""
+    result = {}
+    for topic in sorted(TOPICS.keys()):
+        try:
+            path = _resolve_path(topic)
+            first_line = path.read_text().split("\n", 1)[0].lstrip("# ").strip()
+            result[topic] = first_line
+        except FileNotFoundError:
+            result[topic] = "(not found — run `axolotl fetch docs`)"
+    return result
--- a/src/axolotl/cli/main.py
+++ b/src/axolotl/cli/main.py
@@ -294,7 +294,9 @@ def merge_lora(config: str, **kwargs):


@cli.command()
-@click.argument("directory", type=click.Choice(["examples", "deepspeed_configs"]))
+@click.argument(
+    "directory", type=click.Choice(["examples", "deepspeed_configs", "docs"])
+)
@click.option("--dest", help="Destination directory")
 def fetch(directory: str, dest: Optional[str]):
    """
@@ -303,9 +305,10 @@ def fetch(directory: str, dest: Optional[str]):
    Available directories:
    - examples: Example configuration files
    - deepspeed_configs: DeepSpeed configuration files
+    - docs: Full documentation (Quarto markdown files)

    Args:
-        directory: One of `examples`, `deepspeed_configs`.
+        directory: One of `examples`, `deepspeed_configs`, `docs`.
        dest: Optional destination directory.
    """
    fetch_from_github(f"{directory}/", dest)
@@ -340,6 +343,112 @@ def delinearize_llama4(model: str, output: str):
    do_delinearize_llama4(model, output)


+@cli.command("agent-docs")
+@click.argument("topic", required=False, default=None)
+@click.option("--list", "list_topics", is_flag=True, help="List available topics")
+def agent_docs(topic: Optional[str], list_topics: bool):
+    """Show agent-optimized documentation.
+
+    Prints reference docs designed for AI coding agents.
+    These docs are bundled with the package — no network access needed.
+
+    \b
+    Examples:
+        axolotl agent-docs              # overview (start here)
+        axolotl agent-docs grpo         # GRPO reference
+        axolotl agent-docs sft          # SFT reference
+        axolotl agent-docs --list       # list all topics
+    """
+    from axolotl.cli.agent_docs import get_doc, list_topics as _list_topics
+
+    if list_topics:
+        for name, title in _list_topics().items():
+            click.echo(f"  {name:25s} {title}")
+        return
+
+    if topic is None:
+        topic = "overview"
+
+    try:
+        click.echo(get_doc(topic))
+    except FileNotFoundError as exc:
+        raise click.BadParameter(str(exc)) from exc
+
+
+@cli.command("config-schema")
+@click.option(
+    "--format",
+    "output_format",
+    type=click.Choice(["json", "yaml"]),
+    default="json",
+    help="Output format (default: json)",
+)
+@click.option("--field", help="Show schema for a specific field only")
+def config_schema(output_format: str, field: Optional[str]):
+    """Dump the full config JSON schema.
+
+    Useful for AI agents and tooling to discover all available config options,
+    their types, defaults, and descriptions.
+
+    \b
+    Examples:
+        axolotl config-schema                    # full JSON schema
+        axolotl config-schema --format yaml      # YAML format
+        axolotl config-schema --field adapter     # single field
+    """
+    import json
+
+    try:
+        schema = AxolotlInputConfig.model_json_schema()
+    except (TypeError, ValueError, AttributeError) as exc:
+        # Fallback: dump field names, types, and defaults when full schema
+        # generation fails (e.g. torch.dtype not JSON-serializable)
+        LOG.warning(
+            "Full JSON schema generation failed, using simplified fallback: %s", exc
+        )
+        fields = {}
+        for name, field_info in AxolotlInputConfig.model_fields.items():
+            entry = {}
+            if field_info.description:
+                entry["description"] = field_info.description
+            if field_info.default is not None:
+                try:
+                    json.dumps(field_info.default)
+                    entry["default"] = field_info.default
+                except (TypeError, ValueError):
+                    entry["default"] = str(field_info.default)
+            annotation = field_info.annotation
+            if annotation is not None:
+                entry["type"] = str(annotation)
+            fields[name] = entry
+        schema = {
+            "properties": fields,
+            "_note": "simplified schema (full generation failed)",
+        }
+
+    if field:
+        props = schema.get("properties", {})
+        if field not in props:
+            # Try case-insensitive match
+            matches = [k for k in props if k.lower() == field.lower()]
+            if matches:
+                field = matches[0]
+            else:
+                raise click.BadParameter(
+                    f"Unknown field: {field!r}. "
+                    f"Omit --field to dump the full schema, "
+                    f"or pipe to jq: axolotl config-schema | jq '.properties | keys'"
+                )
+        schema = {field: props[field]}
+
+    if output_format == "yaml":
+        import yaml  # pylint: disable=import-outside-toplevel
+
+        click.echo(yaml.dump(schema, default_flow_style=False, sort_keys=False))
+    else:
+        click.echo(json.dumps(schema, indent=2))
+
+
 cli.add_command(lm_eval)