fix: return proper attention for llama4 lora kernel and fsdp2 llama4 example fix (#2943)

* fix: return proper attention for llama4 lora optim * fix: update fsdp2 llama4 config
2025-07-20 00:54:43 +07:00
parent e5734e5cf0
commit b986f7c7cb
1 changed files with 1 additions and 1 deletions
--- a/examples/llama-4/scout-qlora-flexattn-fsdp2.yaml
+++ b/examples/llama-4/scout-qlora-flexattn-fsdp2.yaml
@@ -74,7 +74,7 @@ fsdp:
 fsdp_config:
  fsdp_version: 2
  fsdp_offload_params: false
-  fsdp_cpu_ram_efficient_loading: true
+  # fsdp_cpu_ram_efficient_loading: true # does not work with load_in_8bit/4bit
  fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
  fsdp_transformer_layer_cls_to_wrap: Llama4TextDecoderLayer
  fsdp_state_dict_type: SHARDED_STATE_DICT