From 851ccb123745df742888f4199d04277da51d5e09 Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Sun, 9 Jun 2024 17:13:28 -0400 Subject: [PATCH] bump deepspeed for fix for grad norm compute putting tensors on different devices (#1699) --- requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index b5114bbf6..52f98042c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ transformers==4.41.1 tokenizers==0.19.1 bitsandbytes==0.43.1 accelerate==0.30.1 -deepspeed==0.14.2 +deepspeed @ git+https://github.com/microsoft/DeepSpeed.git@bc48371c5e1fb8fd70fc79285e66201dbb65679b pydantic==2.6.3 addict fire diff --git a/setup.py b/setup.py index 3d1537edf..c7b4e15de 100644 --- a/setup.py +++ b/setup.py @@ -83,7 +83,7 @@ setup( "fused-dense-lib @ git+https://github.com/Dao-AILab/flash-attention@v2.5.8#subdirectory=csrc/fused_dense_lib", ], "deepspeed": [ - "deepspeed==0.14.2", + "deepspeed @ git+https://github.com/microsoft/DeepSpeed.git@bc48371c5e1fb8fd70fc79285e66201dbb65679b", "deepspeed-kernels", ], "mamba-ssm": [