From 94219f6ee8a4e4b6d28d768c94379406392efe24 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 2 Jun 2025 15:54:29 -0700 Subject: [PATCH] chore: update pre-commit hooks (#2745) * chore: update pre-commit hooks * trigger linter when pre commit hooks are updated * fix type checks from upgraded pre-commit --------- Co-authored-by: djsaunde <1245942+djsaunde@users.noreply.github.com> Co-authored-by: Wing Lian --- .github/workflows/lint.yml | 1 + .pre-commit-config.yaml | 2 +- src/axolotl/kernels/lora.py | 10 +++++----- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index d85892b43..d04450428 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -9,6 +9,7 @@ on: - '.github/workflows/*.yml' - "*.[q]md" - "examples/**/*.y[a]?ml" + - ".pre-commit-config.yaml" workflow_dispatch: jobs: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index be78d0d3e..195746d2d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,7 +27,7 @@ repos: hooks: - id: pylint - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.15.0 + rev: v1.16.0 hooks: - id: mypy additional_dependencies: diff --git a/src/axolotl/kernels/lora.py b/src/axolotl/kernels/lora.py index 03fca6df4..63c9e57bd 100644 --- a/src/axolotl/kernels/lora.py +++ b/src/axolotl/kernels/lora.py @@ -280,19 +280,19 @@ class LoRA_MLP(torch.autograd.Function): # Initialize and compute LoRA gradients d_down_A = d_down_B = d_up_A = d_up_B = d_gate_A = d_gate_B = None - if down_A is not None: + if down_A is not None and down_B is not None: d_down_A = h.t() @ (grad_output @ down_B.t()) d_down_B = (down_A.t() @ h.t()) @ grad_output d_down_A *= down_scale d_down_B *= down_scale - if up_A is not None: + if up_A is not None and up_B is not None: d_up_A = X.t() @ (grad_up @ up_B.t()) d_up_B = (up_A.t() @ X.t()) @ grad_up d_up_A *= up_scale d_up_B *= up_scale - if gate_A is not None: + if gate_A is not None and gate_B is not None: d_gate_A = X.t() @ (grad_gate @ gate_B.t()) d_gate_B = (gate_A.t() @ X.t()) @ grad_gate d_gate_A *= gate_scale @@ -311,7 +311,7 @@ class LoRA_MLP(torch.autograd.Function): del up_weight # Note the .to(dtype) only where mixing LoRA with base weights - if up_A is not None: + if up_A is not None and up_B is not None: dX += grad_up @ up_B.to(dtype).t() @ (up_scale * up_A.to(dtype).t()) # Gate projection gradients @@ -319,7 +319,7 @@ class LoRA_MLP(torch.autograd.Function): dX += grad_gate @ gate_weight.t() del gate_weight - if gate_A is not None: + if gate_A is not None and gate_B is not None: dX += ( grad_gate @ gate_B.to(dtype).t()