Built site for gh-pages

This commit is contained in:
Quarto GHA Workflow Runner
2024-04-12 13:03:34 +00:00
parent b4f51e2d13
commit a98fe5469e
4 changed files with 30 additions and 28 deletions

View File

@@ -65,12 +65,14 @@ deepspeed:
weight_decay: 0.0
fsdp:
- full_shard
- auto_wrap
fsdp_config:
fsdp_limit_all_gathers: true
fsdp_sync_module_states: true
fsdp_offload_params: true
fsdp_use_orig_params: false
fsdp_cpu_ram_efficient_loading: true
fsdp_auto_wrap_policy: TRANSFORMER_BASED_WRAP
fsdp_transformer_layer_cls_to_wrap: LlamaDecoderLayer
fsdp_state_dict_type: SHARDED_STATE_DICT
fsdp_state_dict_type: FULL_STATE_DICT
special_tokens: