diff --git a/examples/gpt-oss/README.md b/examples/gpt-oss/README.md index 98f3ea892..0aa04a71c 100644 --- a/examples/gpt-oss/README.md +++ b/examples/gpt-oss/README.md @@ -67,9 +67,23 @@ mv ./outputs/gpt-oss-out/merged/* ./outputs/gpt-oss-out/ ### Inferencing your fine-tuned model +#### vLLM + GPT-OSS support in vLLM does not exist in a stable release yet. See https://x.com/MaziyarPanahi/status/1955741905515323425 for more information about using a special vllm-openai docker image for inferencing with vLLM. +Optionally, vLLM can be installed from nightly: + +```bash +pip install --no-build-isolation --pre -U vllm --extra-index-url https://wheels.vllm.ai/nightly +``` +and the vLLM server can be started with the following command (modify `--tensor-parallel-size 8` to match your environment): +```bash +vllm serve ./outputs/gpt-oss-out/ --served-model-name axolotl/gpt-oss-20b --host 0.0.0.0 --port 8888 --tensor-parallel-size 8 +``` + +#### SGLang + SGLang has 0-day support in main, see https://github.com/sgl-project/sglang/issues/8833 for infomation on installing SGLang from source. Once you've installed SGLang, run the following command to launch a SGLang server: diff --git a/examples/gpt-oss/gpt-oss-20b-fft-fsdp2-offload.yaml b/examples/gpt-oss/gpt-oss-20b-fft-fsdp2-offload.yaml index a6ba83433..1b142b6c3 100644 --- a/examples/gpt-oss/gpt-oss-20b-fft-fsdp2-offload.yaml +++ b/examples/gpt-oss/gpt-oss-20b-fft-fsdp2-offload.yaml @@ -15,7 +15,7 @@ datasets: field_thinking: thinking template_thinking_key: thinking -dataset_prepared_path: last_run_prepared +dataset_prepared_path: ./outputs/last_run_prepared val_set_size: 0 output_dir: ./outputs/gpt-oss-out/ diff --git a/examples/gpt-oss/gpt-oss-20b-fft-fsdp2.yaml b/examples/gpt-oss/gpt-oss-20b-fft-fsdp2.yaml index aa658c863..bdbb70fae 100644 --- a/examples/gpt-oss/gpt-oss-20b-fft-fsdp2.yaml +++ b/examples/gpt-oss/gpt-oss-20b-fft-fsdp2.yaml @@ -15,7 +15,7 @@ datasets: field_thinking: thinking template_thinking_key: thinking -dataset_prepared_path: last_run_prepared +dataset_prepared_path: ./outputs/last_run_prepared val_set_size: 0 output_dir: ./outputs/gpt-oss-out/ diff --git a/src/axolotl/cli/cloud/modal_.py b/src/axolotl/cli/cloud/modal_.py index 240c6d894..0509cba69 100644 --- a/src/axolotl/cli/cloud/modal_.py +++ b/src/axolotl/cli/cloud/modal_.py @@ -82,7 +82,7 @@ class ModalCloud(Cloud): return res def get_image(self): - docker_tag = "main-py3.11-cu124-2.6.0" + docker_tag = "main-py3.11-cu126-2.7.1" if self.config.docker_tag: docker_tag = self.config.docker_tag docker_image = f"axolotlai/axolotl:{docker_tag}" diff --git a/src/axolotl/cli/inference.py b/src/axolotl/cli/inference.py index 83b567b64..d03a91bc7 100644 --- a/src/axolotl/cli/inference.py +++ b/src/axolotl/cli/inference.py @@ -64,7 +64,7 @@ def do_inference( importlib.import_module("axolotl.prompters"), prompter ) elif cfg.chat_template: - chat_template_str = get_chat_template(cfg.chat_template) + chat_template_str = get_chat_template(cfg.chat_template, tokenizer=tokenizer) elif cfg.datasets[0].type == "chat_template": chat_template_str = get_chat_template_from_config( cfg=cfg, ds_cfg=cfg.datasets[0], tokenizer=tokenizer diff --git a/src/axolotl/cli/preprocess.py b/src/axolotl/cli/preprocess.py index 5d692c315..4120062d8 100644 --- a/src/axolotl/cli/preprocess.py +++ b/src/axolotl/cli/preprocess.py @@ -97,7 +97,8 @@ def do_cli( """ # pylint: disable=duplicate-code os.environ["AXOLOTL_IS_PREPROCESS"] = "1" - parsed_cfg = load_cfg(config, **kwargs) + is_preprocess = kwargs.pop("is_preprocess", True) + parsed_cfg = load_cfg(config, is_preprocess=is_preprocess, **kwargs) parsed_cfg.is_preprocess = True parser = transformers.HfArgumentParser(PreprocessCliArgs) parsed_cli_args, _ = parser.parse_args_into_dataclasses(