From 6f15da4cac71c96ff541ac76c0d32e4cc7aede8e Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Mon, 6 Apr 2026 10:00:55 -0700 Subject: [PATCH] make it easier for agents to discover docs (#3579) [skip ci] * make it easier for agents to discover docs * fixup pr comments --- .github/workflows/tests.yml | 10 +++ AGENTS.md | 3 + MANIFEST.in | 2 + README.md | 23 +++++ src/axolotl/cli/agent_docs/__init__.py | 106 +++++++++++++++++++++++ src/axolotl/cli/main.py | 113 ++++++++++++++++++++++++- 6 files changed, 255 insertions(+), 2 deletions(-) create mode 100644 src/axolotl/cli/agent_docs/__init__.py diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index d753afe01..b1e9c718e 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -220,6 +220,16 @@ jobs: run: | axolotl --help + - name: Verify agent docs are discoverable + run: | + # Agent docs live in docs/agents/ (source of truth) and are resolved + # at runtime from the repo checkout or via `axolotl fetch docs` + axolotl agent-docs --list + axolotl agent-docs | grep -q "Fine-tuning framework" + axolotl agent-docs grpo | grep -q "GRPO" + axolotl agent-docs sft | grep -q "SFT" + python -c "from axolotl.cli.agent_docs import get_doc, list_topics; assert len(list_topics()) >= 5; assert 'GRPO' in get_doc('grpo')" + - name: Show HF cache run: hf cache ls diff --git a/AGENTS.md b/AGENTS.md index b81904e43..6fb81e506 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -16,6 +16,9 @@ axolotl inference config.yaml # Interactive inference axolotl merge-lora config.yaml # Merge LoRA adapter into base model axolotl vllm-serve config.yaml # Start vLLM server for GRPO/EBFT training axolotl fetch examples # Download example configs +axolotl agent-docs # Show agent-optimized docs (bundled with pip package) +axolotl agent-docs grpo # Topic-specific agent reference +axolotl config-schema # Dump config JSON schema ``` ## Training Methods diff --git a/MANIFEST.in b/MANIFEST.in index 3fbb0edca..30cd07242 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,4 +3,6 @@ include README.md include LICENSE include src/setuptools_axolotl_dynamic_dependencies.py include src/axolotl/utils/chat_templates/templates/*.jinja +include AGENTS.md +recursive-include docs/agents *.md recursive-include axolotl *.py diff --git a/README.md b/README.md index e353d20ad..3d0710f0f 100644 --- a/README.md +++ b/README.md @@ -157,6 +157,29 @@ That's it! Check out our [Getting Started Guide](https://docs.axolotl.ai/docs/ge - [API Reference](https://docs.axolotl.ai/docs/api/) - Auto-generated code documentation - [FAQ](https://docs.axolotl.ai/docs/faq.html) - Frequently asked questions +## AI Agent Support + +Axolotl ships with built-in documentation optimized for AI coding agents (Claude Code, Cursor, Copilot, etc.). These docs are bundled with the pip package — no repo clone needed. + +```bash +# Show overview and available training methods +axolotl agent-docs + +# Topic-specific references +axolotl agent-docs sft # supervised fine-tuning +axolotl agent-docs grpo # GRPO online RL +axolotl agent-docs preference_tuning # DPO, KTO, ORPO, SimPO +axolotl agent-docs reward_modelling # outcome and process reward models +axolotl agent-docs pretraining # continual pretraining +axolotl agent-docs --list # list all topics + +# Dump config schema for programmatic use +axolotl config-schema +axolotl config-schema --field adapter +``` + +If you're working with the source repo, agent docs are also available at `docs/agents/` and the project overview is in `AGENTS.md`. + ## 🤝 Getting Help - Join our [Discord community](https://discord.gg/HhrNrHJPRb) for support diff --git a/src/axolotl/cli/agent_docs/__init__.py b/src/axolotl/cli/agent_docs/__init__.py new file mode 100644 index 000000000..d229184c0 --- /dev/null +++ b/src/axolotl/cli/agent_docs/__init__.py @@ -0,0 +1,106 @@ +"""Bundled agent documentation for axolotl. + +These docs are optimized for consumption by AI coding agents. +The source of truth is docs/agents/*.md and AGENTS.md in the repo root. +This module resolves those paths at runtime — no files are duplicated +into the package. + +For pip-only installs (no repo checkout), run `axolotl fetch docs` first +to download the docs locally. +""" + +from pathlib import Path + +# Topic name -> (filename in docs/agents/, fallback filename for AGENTS.md) +TOPICS = { + "overview": "AGENTS.md", + "sft": "docs/agents/sft.md", + "grpo": "docs/agents/grpo.md", + "preference_tuning": "docs/agents/preference_tuning.md", + "reward_modelling": "docs/agents/reward_modelling.md", + "pretraining": "docs/agents/pretraining.md", +} + + +def _find_repo_root() -> Path | None: + """Walk up from this file to find the repo root (contains AGENTS.md).""" + # In an editable install or repo checkout, walk up from + # src/axolotl/cli/agent_docs/ to find the repo root + current = Path(__file__).resolve().parent + while current != current.parent: + if (current / "AGENTS.md").exists() and (current / "docs" / "agents").is_dir(): + return current + current = current.parent + return None + + +def _find_docs_dir() -> Path | None: + """Find a fetched docs directory (from `axolotl fetch docs`).""" + # axolotl fetch docs --dest defaults to ./docs/ in cwd + cwd_docs = Path.cwd() / "docs" / "agents" + if cwd_docs.is_dir(): + return Path.cwd() + return None + + +def _resolve_path(topic: str) -> Path: + """Resolve a topic name to the actual file path.""" + if topic not in TOPICS: + available = ", ".join(sorted(TOPICS.keys())) + raise FileNotFoundError(f"Unknown topic: {topic!r}. Available: {available}") + + relative_path = TOPICS[topic] + + # Try repo root first (editable install / repo checkout) + repo_root = _find_repo_root() + if repo_root: + candidate = repo_root / relative_path + if candidate.exists(): + return candidate + + # Try cwd (fetched docs via `axolotl fetch docs`) + docs_root = _find_docs_dir() + if docs_root: + candidate = docs_root / relative_path + if candidate.exists(): + return candidate + + # Also check cwd directly for AGENTS.md + if topic == "overview": + cwd_agents = Path.cwd() / "AGENTS.md" + if cwd_agents.exists(): + return cwd_agents + + raise FileNotFoundError( + f"Could not find {relative_path!r}. " + f"If you installed axolotl via pip, run `axolotl fetch docs` first " + f"to download the documentation." + ) + + +def get_doc(topic: str = "overview") -> str: + """Return the content of an agent doc by topic name. + + Args: + topic: One of the keys in TOPICS, or "overview" (default). + + Returns: + The markdown content of the doc. + + Raises: + FileNotFoundError: If the topic can't be found. + """ + return _resolve_path(topic).read_text() + + +def list_topics() -> dict[str, str]: + """Return a dict of topic name -> first line (title) of each doc.""" + result = {} + for topic in sorted(TOPICS.keys()): + try: + path = _resolve_path(topic) + first_line = path.read_text().split("\n", 1)[0].lstrip("# ").strip() + result[topic] = first_line + except FileNotFoundError: + result[topic] = "(not found — run `axolotl fetch docs`)" + return result diff --git a/src/axolotl/cli/main.py b/src/axolotl/cli/main.py index c0ac32050..cca6481e6 100644 --- a/src/axolotl/cli/main.py +++ b/src/axolotl/cli/main.py @@ -294,7 +294,9 @@ def merge_lora(config: str, **kwargs): @cli.command() -@click.argument("directory", type=click.Choice(["examples", "deepspeed_configs"])) +@click.argument( + "directory", type=click.Choice(["examples", "deepspeed_configs", "docs"]) +) @click.option("--dest", help="Destination directory") def fetch(directory: str, dest: Optional[str]): """ @@ -303,9 +305,10 @@ def fetch(directory: str, dest: Optional[str]): Available directories: - examples: Example configuration files - deepspeed_configs: DeepSpeed configuration files + - docs: Full documentation (Quarto markdown files) Args: - directory: One of `examples`, `deepspeed_configs`. + directory: One of `examples`, `deepspeed_configs`, `docs`. dest: Optional destination directory. """ fetch_from_github(f"{directory}/", dest) @@ -340,6 +343,112 @@ def delinearize_llama4(model: str, output: str): do_delinearize_llama4(model, output) +@cli.command("agent-docs") +@click.argument("topic", required=False, default=None) +@click.option("--list", "list_topics", is_flag=True, help="List available topics") +def agent_docs(topic: Optional[str], list_topics: bool): + """Show agent-optimized documentation. + + Prints reference docs designed for AI coding agents. + These docs are bundled with the package — no network access needed. + + \b + Examples: + axolotl agent-docs # overview (start here) + axolotl agent-docs grpo # GRPO reference + axolotl agent-docs sft # SFT reference + axolotl agent-docs --list # list all topics + """ + from axolotl.cli.agent_docs import get_doc, list_topics as _list_topics + + if list_topics: + for name, title in _list_topics().items(): + click.echo(f" {name:25s} {title}") + return + + if topic is None: + topic = "overview" + + try: + click.echo(get_doc(topic)) + except FileNotFoundError as exc: + raise click.BadParameter(str(exc)) from exc + + +@cli.command("config-schema") +@click.option( + "--format", + "output_format", + type=click.Choice(["json", "yaml"]), + default="json", + help="Output format (default: json)", +) +@click.option("--field", help="Show schema for a specific field only") +def config_schema(output_format: str, field: Optional[str]): + """Dump the full config JSON schema. + + Useful for AI agents and tooling to discover all available config options, + their types, defaults, and descriptions. + + \b + Examples: + axolotl config-schema # full JSON schema + axolotl config-schema --format yaml # YAML format + axolotl config-schema --field adapter # single field + """ + import json + + try: + schema = AxolotlInputConfig.model_json_schema() + except (TypeError, ValueError, AttributeError) as exc: + # Fallback: dump field names, types, and defaults when full schema + # generation fails (e.g. torch.dtype not JSON-serializable) + LOG.warning( + "Full JSON schema generation failed, using simplified fallback: %s", exc + ) + fields = {} + for name, field_info in AxolotlInputConfig.model_fields.items(): + entry = {} + if field_info.description: + entry["description"] = field_info.description + if field_info.default is not None: + try: + json.dumps(field_info.default) + entry["default"] = field_info.default + except (TypeError, ValueError): + entry["default"] = str(field_info.default) + annotation = field_info.annotation + if annotation is not None: + entry["type"] = str(annotation) + fields[name] = entry + schema = { + "properties": fields, + "_note": "simplified schema (full generation failed)", + } + + if field: + props = schema.get("properties", {}) + if field not in props: + # Try case-insensitive match + matches = [k for k in props if k.lower() == field.lower()] + if matches: + field = matches[0] + else: + raise click.BadParameter( + f"Unknown field: {field!r}. " + f"Omit --field to dump the full schema, " + f"or pipe to jq: axolotl config-schema | jq '.properties | keys'" + ) + schema = {field: props[field]} + + if output_format == "yaml": + import yaml # pylint: disable=import-outside-toplevel + + click.echo(yaml.dump(schema, default_flow_style=False, sort_keys=False)) + else: + click.echo(json.dumps(schema, indent=2)) + + cli.add_command(lm_eval)