chore: update pre-commit hooks (#2745)

* chore: update pre-commit hooks

* trigger linter when pre commit hooks are updated

* fix type checks from upgraded pre-commit

---------

Co-authored-by: djsaunde <1245942+djsaunde@users.noreply.github.com>
Co-authored-by: Wing Lian <wing@axolotl.ai>
This commit is contained in:
github-actions[bot]
2025-06-02 15:54:29 -07:00
committed by GitHub
parent ecc719f5c7
commit 94219f6ee8
3 changed files with 7 additions and 6 deletions

View File

@@ -9,6 +9,7 @@ on:
- '.github/workflows/*.yml'
- "*.[q]md"
- "examples/**/*.y[a]?ml"
- ".pre-commit-config.yaml"
workflow_dispatch:
jobs:

View File

@@ -27,7 +27,7 @@ repos:
hooks:
- id: pylint
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.15.0
rev: v1.16.0
hooks:
- id: mypy
additional_dependencies:

View File

@@ -280,19 +280,19 @@ class LoRA_MLP(torch.autograd.Function):
# Initialize and compute LoRA gradients
d_down_A = d_down_B = d_up_A = d_up_B = d_gate_A = d_gate_B = None
if down_A is not None:
if down_A is not None and down_B is not None:
d_down_A = h.t() @ (grad_output @ down_B.t())
d_down_B = (down_A.t() @ h.t()) @ grad_output
d_down_A *= down_scale
d_down_B *= down_scale
if up_A is not None:
if up_A is not None and up_B is not None:
d_up_A = X.t() @ (grad_up @ up_B.t())
d_up_B = (up_A.t() @ X.t()) @ grad_up
d_up_A *= up_scale
d_up_B *= up_scale
if gate_A is not None:
if gate_A is not None and gate_B is not None:
d_gate_A = X.t() @ (grad_gate @ gate_B.t())
d_gate_B = (gate_A.t() @ X.t()) @ grad_gate
d_gate_A *= gate_scale
@@ -311,7 +311,7 @@ class LoRA_MLP(torch.autograd.Function):
del up_weight
# Note the .to(dtype) only where mixing LoRA with base weights
if up_A is not None:
if up_A is not None and up_B is not None:
dX += grad_up @ up_B.to(dtype).t() @ (up_scale * up_A.to(dtype).t())
# Gate projection gradients
@@ -319,7 +319,7 @@ class LoRA_MLP(torch.autograd.Function):
dX += grad_gate @ gate_weight.t()
del gate_weight
if gate_A is not None:
if gate_A is not None and gate_B is not None:
dX += (
grad_gate
@ gate_B.to(dtype).t()