bump deepspeed for fix for grad norm compute putting tensors on different devices (#1699)

2024-06-09 17:13:28 -04:00
parent 18cabc0c46
commit 851ccb1237
2 changed files with 2 additions and 2 deletions
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,7 +5,7 @@ transformers==4.41.1
 tokenizers==0.19.1
 bitsandbytes==0.43.1
 accelerate==0.30.1
-deepspeed==0.14.2
+deepspeed @ git+https://github.com/microsoft/DeepSpeed.git@bc48371c5e1fb8fd70fc79285e66201dbb65679b
 pydantic==2.6.3
 addict
 fire
--- a/setup.py
+++ b/setup.py
@@ -83,7 +83,7 @@ setup(
            "fused-dense-lib  @ git+https://github.com/Dao-AILab/flash-attention@v2.5.8#subdirectory=csrc/fused_dense_lib",
        ],
        "deepspeed": [
-            "deepspeed==0.14.2",
+            "deepspeed @ git+https://github.com/microsoft/DeepSpeed.git@bc48371c5e1fb8fd70fc79285e66201dbb65679b",
            "deepspeed-kernels",
        ],
        "mamba-ssm": [