differential flash attention 2; cleanup

This commit is contained in:
Dan Saunders
2024-12-17 18:44:47 +00:00
parent 41ebd93158
commit bda1eed59e
8 changed files with 268 additions and 106 deletions

View File

@@ -0,0 +1,6 @@
metric,training,validation
loss,1.8773103952407837,1.915901780128479
model_preparation_time,0.0051,0.0051
runtime,89.7635,8.9565
samples_per_second,20.053,22.33
steps_per_second,20.053,22.33
1 metric training validation
2 loss 1.8773103952407837 1.915901780128479
3 model_preparation_time 0.0051 0.0051
4 runtime 89.7635 8.9565
5 samples_per_second 20.053 22.33
6 steps_per_second 20.053 22.33