prepare does all this already for qlora?

2023-05-23 11:33:41 -04:00
parent 3b4d055edd
commit b9d07aa95a
1 changed files with 12 additions and 12 deletions
--- a/src/axolotl/utils/models.py
+++ b/src/axolotl/utils/models.py
@@ -204,17 +204,17 @@ def load_model(
    """### Post-processing on the model
        Finally, we need to apply some post-processing on the 8-bit model to enable training, let's freeze all our layers, and cast the layer-norm in `float32` for stability. We also cast the output of the last layer in `float32` for the same reasons.
    """
-    if cfg.adapter == "qlora":
-        for param in model.parameters():
-            param.requires_grad = False  # freeze the model - train adapters later
-            if param.ndim == 1:
-                # cast the small parameters (e.g. layernorm) to fp32 for stability
-                param.data = param.data.to(torch.float32)
-        class CastOutputToFloat(nn.Sequential):
-            def forward(self, x):
-                return super().forward(x).to(torch.float32)
-
-        model.lm_head = CastOutputToFloat(model.lm_head)
+    # if cfg.adapter == "qlora":
+    #     for param in model.parameters():
+    #         param.requires_grad = False  # freeze the model - train adapters later
+    #         if param.ndim == 1:
+    #             # cast the small parameters (e.g. layernorm) to fp32 for stability
+    #             param.data = param.data.to(torch.float32)
+    #     class CastOutputToFloat(nn.Linear):
+    #         def forward(self, x):
+    #             return super().forward(x).to(torch.float32)
+    #
+    #     model.lm_head = CastOutputToFloat(model.lm_head.in_features, model.lm_head.out_features, model.lm_head.bias)

    if not tokenizer:
        try:
@@ -255,7 +255,7 @@ def load_model(
    embeddings_len = math.ceil(len(tokenizer) / 32) * 32
    model.resize_token_embeddings(embeddings_len)

-    if cfg.adapter and load_in_8bit and not cfg.load_4bit:
+    if ((cfg.adapter == "lora" and load_in_8bit) or cfg.adapter == "qlora") and not cfg.load_4bit:
        logging.info("converting PEFT model w/ prepare_model_for_int8_training")
        model = prepare_model_for_int8_training(model)