don't use mask expansion for inference (#392)
This commit is contained in:
@@ -138,8 +138,10 @@ def load_model(
|
||||
LOG.info("patching with xpos rope")
|
||||
replace_llama_rope_with_xpos_rope()
|
||||
|
||||
if cfg.is_llama_derived_model and (
|
||||
cfg.max_packed_sequence_len or cfg.sample_packing
|
||||
if (
|
||||
cfg.is_llama_derived_model
|
||||
and (cfg.max_packed_sequence_len or cfg.sample_packing)
|
||||
and not cfg.inference
|
||||
):
|
||||
from axolotl.monkeypatch.llama_expand_mask import hijack_expand_mask
|
||||
|
||||
|
||||
Reference in New Issue
Block a user