Migrate QAT API; fix axolotl quantize for QAT-ed models; add NVFP4 (#3107)

2025-09-12 10:55:50 +01:00
parent 0401a15888
commit 58d67bf98d
16 changed files with 554 additions and 339 deletions
--- a/tests/e2e/test_qat.py
+++ b/tests/e2e/test_qat.py
@@ -43,7 +43,7 @@ class TestQATLlama:
                "qat": {
                    "quantize_embedding": True,
                    "activation_dtype": "int8",
-                    "weight_dtype": "int8",
+                    "weight_dtype": "int4",
                    "group_size": 8,
                },
                "num_epochs": 1,
@@ -111,7 +111,7 @@ class TestQATLlama:
                "qat": {
                    "quantize_embedding": True,
                    "activation_dtype": "int8",
-                    "weight_dtype": "int8",
+                    "weight_dtype": "int4",
                    "group_size": 8,
                },
                "save_first_step": False,