simplify logic (#1856)

This commit is contained in:
Wing Lian
2024-08-23 20:23:08 -04:00
committed by GitHub
parent 77a4b9cda2
commit 22f4eafa55

View File

@@ -589,19 +589,12 @@ def load_model(
# sample packing uses custom FA2 patch
if cfg.flash_attention:
if not cfg.sample_packing:
if cfg.s2_attention:
pass
# most other models support flash attention, we can define exceptions as they come up
model_kwargs["attn_implementation"] = "flash_attention_2"
model_config._attn_implementation = ( # pylint: disable=protected-access
"flash_attention_2"
)
else:
model_kwargs["attn_implementation"] = "flash_attention_2"
model_config._attn_implementation = ( # pylint: disable=protected-access
"flash_attention_2"
)
if not cfg.sample_packing and cfg.s2_attention:
pass
model_kwargs["attn_implementation"] = "flash_attention_2"
model_config._attn_implementation = ( # pylint: disable=protected-access
"flash_attention_2"
)
elif cfg.sdp_attention:
model_kwargs["attn_implementation"] = "sdpa"
model_config._attn_implementation = "sdpa" # pylint: disable=protected-access