CLI: add --launcher option, support launcher args, cleanup, refactor (#2924)

* add --launcher option; explicit True/False bool args; small cleanup * refactor * add torchrun, accelerate cli args * add rdzv arg default + tests * update _quarto * coderabbit * fix * we can't set rdvz_id independently across nodes * coderabbit * fix tests
2025-07-30 15:46:56 -04:00
parent 22810c97b7
commit bb1cae1a20
31 changed files with 1417 additions and 541 deletions
--- a/tests/cli/test_cli_base.py
+++ b/tests/cli/test_cli_base.py
@@ -17,16 +17,23 @@ class BaseCliTest:
            command: Command to test (train/evaluate)
        """
        # Test missing config file
-        result = cli_runner.invoke(cli, [command, "--no-accelerate"])
+        result = cli_runner.invoke(cli, [command, "--launcher", "python"])
        assert result.exit_code != 0

        # Test non-existent config file
-        result = cli_runner.invoke(cli, [command, "nonexistent.yml", "--no-accelerate"])
+        result = cli_runner.invoke(
+            cli, [command, "nonexistent.yml", "--launcher", "python"]
+        )
        assert result.exit_code != 0
        assert "Error: Invalid value for 'CONFIG'" in result.output

    def _test_basic_execution(
-        self, cli_runner, tmp_path: Path, valid_test_config: str, command: str
+        self,
+        cli_runner,
+        tmp_path: Path,
+        valid_test_config: str,
+        command: str,
+        train: bool = True,
    ):
        """Test basic execution with accelerate.

@@ -35,6 +42,7 @@ class BaseCliTest:
            tmp_path: Temporary path fixture
            valid_test_config: Valid config fixture
            command: Command to test (train/evaluate)
+            train: Whether to test training (default) or evaluation
        """
        config_path = tmp_path / "config.yml"
        config_path.write_text(valid_test_config)
@@ -43,15 +51,21 @@ class BaseCliTest:
            result = cli_runner.invoke(cli, [command, str(config_path)])

            assert mock.called
-            assert mock.call_args.args[0] == [
+
+            expected = [
                "accelerate",
                "launch",
                "-m",
                f"axolotl.cli.{command}",
                str(config_path),
-                "--debug-num-examples",
-                "0",
+                "--debug=False",
+                "--debug-text-only=False",
+                "--debug-num-examples=0",
            ]
+            if train:
+                expected.append("--shard=False")
+
+            assert mock.call_args.args[0] == expected
            assert mock.call_args.kwargs == {"check": True}
            assert result.exit_code == 0