feat: add doc for expert quantization, glm45 air example configs, and update readme for release (#3452) [skip ci]

* chore: rename without period

* feat: add glm45 air

* feat: add doc on expert quantization

* feat: update base readme with new changes

* chore: cleanup

* chore: cleanup

* chore: cleanup

* fix: disable quantize_moe_expert on merge per comment

* chore: add kernel info to optimizations doc
This commit is contained in:
NanoCode012
2026-03-05 21:58:09 +07:00
committed by GitHub
parent b6b8db805a
commit 753906cfc7
13 changed files with 248 additions and 29 deletions

View File

@@ -71,6 +71,7 @@ def do_cli(config: Union[Path, str] = Path("examples/"), **kwargs) -> None:
merge_lora=True,
load_in_8bit=False,
load_in_4bit=False,
quantize_moe_experts=False,
flash_attention=False,
context_parallel_size=None,
deepspeed=None,

View File

@@ -37,6 +37,7 @@ SUPPORTED_MULTIPACK_MODEL_TYPES = [
"deepseek_v3",
"glm",
"glm4",
"glm4_moe",
"smollm3",
"granite",
"granitemoe",