migrate example configs to canonical attn_implementation
This commit is contained in:
@@ -24,7 +24,7 @@ output_dir: ./outputs/out_gemma/
|
||||
|
||||
sequence_len: 8096
|
||||
sample_packing: true
|
||||
flash_attention: true
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
wandb_entity:
|
||||
wandb_watch:
|
||||
|
||||
@@ -24,7 +24,7 @@ output_dir: ./outputs/qat_out_gemma/
|
||||
|
||||
sequence_len: 8096
|
||||
sample_packing: true
|
||||
flash_attention: true
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
qat:
|
||||
activation_dtype: nvfp4
|
||||
|
||||
@@ -24,7 +24,7 @@ output_dir: ./outputs/out_math_gemma/
|
||||
|
||||
sequence_len: 4096
|
||||
sample_packing: true
|
||||
flash_attention: true
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
wandb_entity:
|
||||
wandb_watch:
|
||||
|
||||
@@ -24,7 +24,7 @@ output_dir: ./outputs/qat_out_math_gemma/
|
||||
|
||||
sequence_len: 4096
|
||||
sample_packing: true
|
||||
flash_attention: true
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
qat:
|
||||
activation_dtype: nvfp4
|
||||
|
||||
@@ -24,7 +24,7 @@ output_dir: ./outputs/out_math_gemma27/
|
||||
|
||||
sequence_len: 4096
|
||||
sample_packing: true
|
||||
flash_attention: true
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
wandb_entity:
|
||||
wandb_watch:
|
||||
|
||||
@@ -24,7 +24,7 @@ output_dir: ./outputs/qat_out_math_gemma27/
|
||||
|
||||
sequence_len: 4096
|
||||
sample_packing: true
|
||||
flash_attention: true
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
qat:
|
||||
activation_dtype: nvfp4
|
||||
|
||||
@@ -24,7 +24,7 @@ output_dir: ./outputs/out_math_72b/
|
||||
|
||||
sequence_len: 4096
|
||||
sample_packing: true
|
||||
flash_attention: true
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
wandb_entity:
|
||||
wandb_watch:
|
||||
|
||||
@@ -24,7 +24,7 @@ output_dir: ./outputs/qat_out_math_72b/
|
||||
|
||||
sequence_len: 4096
|
||||
sample_packing: true
|
||||
flash_attention: true
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
qat:
|
||||
activation_dtype: nvfp4
|
||||
|
||||
@@ -24,7 +24,7 @@ output_dir: ./outputs/out_qwen72b/
|
||||
|
||||
sequence_len: 8096
|
||||
sample_packing: true
|
||||
flash_attention: true
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
wandb_entity:
|
||||
wandb_watch:
|
||||
|
||||
@@ -24,7 +24,7 @@ output_dir: ./outputs/qat_out_qwen72b/
|
||||
|
||||
sequence_len: 8096
|
||||
sample_packing: true
|
||||
flash_attention: true
|
||||
attn_implementation: flash_attention_2
|
||||
|
||||
qat:
|
||||
activation_dtype: nvfp4
|
||||
|
||||
Reference in New Issue
Block a user