fix duplicate attn_implementation in gpt-oss yamls and flaky caplog tests
This commit is contained in:
@@ -47,7 +47,6 @@ learning_rate: 2e-5
|
||||
bf16: true
|
||||
tf32: true
|
||||
|
||||
attn_implementation: flash_attention_2
|
||||
attn_implementation: kernels-community/vllm-flash-attn3 # this is not needed if using flash_attn >= 2.8.3
|
||||
|
||||
gradient_checkpointing: true
|
||||
|
||||
@@ -43,7 +43,6 @@ learning_rate: 2e-5
|
||||
bf16: true
|
||||
tf32: true
|
||||
|
||||
attn_implementation: flash_attention_2
|
||||
attn_implementation: kernels-community/vllm-flash-attn3 # this is not needed if using flash_attn >= 2.8.3
|
||||
|
||||
gradient_checkpointing: true
|
||||
|
||||
@@ -44,7 +44,6 @@ learning_rate: 2e-5
|
||||
bf16: true
|
||||
tf32: true
|
||||
|
||||
attn_implementation: flash_attention_2
|
||||
attn_implementation: kernels-community/vllm-flash-attn3 # this is not needed if using flash_attn >= 2.8.3
|
||||
|
||||
gradient_checkpointing: true
|
||||
|
||||
@@ -43,7 +43,6 @@ learning_rate: 2e-5
|
||||
bf16: true
|
||||
tf32: true
|
||||
|
||||
attn_implementation: flash_attention_2
|
||||
attn_implementation: kernels-community/vllm-flash-attn3 # this is not needed if using flash_attn >= 2.8.3
|
||||
|
||||
gradient_checkpointing: true
|
||||
|
||||
@@ -56,7 +56,6 @@ learning_rate: 2e-4
|
||||
bf16: true
|
||||
tf32: true
|
||||
|
||||
attn_implementation: flash_attention_2
|
||||
attn_implementation: kernels-community/vllm-flash-attn3 # this is not needed if using flash_attn >= 2.8.3
|
||||
|
||||
gradient_checkpointing: true
|
||||
|
||||
@@ -56,7 +56,6 @@ learning_rate: 2e-4
|
||||
bf16: true
|
||||
tf32: true
|
||||
|
||||
attn_implementation: flash_attention_2
|
||||
attn_implementation: kernels-community/vllm-flash-attn3 # this is not needed if using flash_attn >= 2.8.3
|
||||
|
||||
gradient_checkpointing: true
|
||||
|
||||
Reference in New Issue
Block a user