framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,1,1,0,0.012166400253772736
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,1,2,0,0.011689600348472596
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,1,4,0,0.011561600118875503
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,1,8,0,0.011615999788045884
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,1,16,0,0.01151840016245842
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,1,32,0,0.01146719977259636
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,1,64,0,0.011604800075292587
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,1,1,0,0.017982399463653563
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,1,2,0,0.017316800355911256
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,1,4,0,0.01724800020456314
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,1,8,0,0.017377600073814392
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,1,16,0,0.017284800112247468
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,1,32,0,0.017078399658203125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,1,64,0,0.01730719953775406
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,16,1,0,0.013283200562000275
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,16,2,0,0.013076800107955932
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,16,4,0,0.01297920048236847
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,16,8,0,0.012988799810409546
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,16,16,0,0.012646399438381195
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,16,32,0,0.012174399942159653
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,16,64,0,0.01207519993185997
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,16,1,0,0.019116799533367156
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,16,2,0,0.01892800033092499
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,16,4,0,0.018727999925613404
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,16,8,0,0.01870879977941513
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,16,16,0,0.01860000044107437
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,16,32,0,0.018129600584506987
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,16,64,0,0.017880000174045563
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,32,1,0,0.014054399728775025
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,32,2,0,0.01395999938249588
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,32,4,0,0.013326400518417358
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,32,8,0,0.013206399977207184
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,32,16,0,0.013019199669361114
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,32,32,0,0.012689599394798278
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,32,64,0,0.01223199963569641
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,32,1,0,0.01989919990301132
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,32,2,0,0.01918399930000305
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,32,4,0,0.018916800618171692
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,32,8,0,0.01879040002822876
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,32,16,0,0.018700799345970152
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,32,32,0,0.018481600284576415
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,32,64,0,0.01783519983291626
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,64,1,0,0.014126400649547576
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,64,2,0,0.014033600687980652
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,64,4,0,0.013439999520778656
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,64,8,0,0.01329759955406189
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,64,16,0,0.013251200318336487
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,64,32,0,0.013254399597644805
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,64,64,0,0.01295360028743744
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,64,1,0,0.020937600731849672
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,64,2,0,0.0200655996799469
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,64,4,0,0.019099199771881105
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,64,8,0,0.018964800238609313
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,64,16,0,0.0188960000872612
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,64,32,0,0.018721599876880646
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,64,64,0,0.018500800430774688
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,128,1,0,0.016638399660587312
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,128,2,0,0.016089600324630738
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,128,4,0,0.01564960032701492
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,128,8,0,0.015360000729560851
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,128,16,0,0.01488959938287735
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,128,32,0,0.014672000706195832
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,128,64,0,0.013760000467300415
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,128,1,0,0.022465600073337554
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,128,2,0,0.02206239998340607
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,128,4,0,0.021902400255203246
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,128,8,0,0.021678400039672852
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,128,16,0,0.020771199464797975
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,128,32,0,0.020681600272655486
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,128,64,0,0.019463999569416045
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,256,1,0,0.028016000986099243
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,256,2,0,0.019696000218391418
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,256,4,0,0.019270400702953338
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,256,8,0,0.018848000466823576
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,256,16,0,0.01828960031270981
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,256,32,0,0.0182559996843338
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,256,64,0,0.017481599748134614
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,256,1,0,0.03622559905052185
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,256,2,0,0.026214399933815004
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,256,4,0,0.02580159902572632
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,256,8,0,0.025241601467132568
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,256,16,0,0.02515200078487396
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,256,32,0,0.02423200011253357
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,256,64,0,0.023153600096702576
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,512,1,0,0.06632959842681885
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,512,2,0,0.0348800003528595
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,512,4,0,0.026542401313781737
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,512,8,0,0.033180800080299375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,512,16,0,0.02704159915447235
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,512,32,0,0.02211039960384369
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,512,64,0,0.02131360024213791
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,512,1,0,0.0762112021446228
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,512,2,0,0.04265759885311127
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,512,4,0,0.03340800106525421
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,512,8,0,0.04033440053462982
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,512,16,0,0.033606401085853575
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,512,32,0,0.02868480086326599
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,512,64,0,0.027993598580360414
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,1024,1,0,0.1710863947868347
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,1024,2,0,0.0910095989704132
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,1024,4,0,0.048230400681495665
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,1024,8,0,0.04020639955997467
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,1024,16,0,0.04084480106830597
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,1024,32,0,0.03064799904823303
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,1024,64,0,0.025857600569725036
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,1024,1,0,0.18035199642181396
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,1024,2,0,0.10048960447311402
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,1024,4,0,0.05718880295753479
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,1024,8,0,0.04755200147628784
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,1024,16,0,0.05054240226745606
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,1024,32,0,0.03841759860515594
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,1024,64,0,0.03292160034179688
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,1536,1,0,0.32365920543670657
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,1536,2,0,0.16808160543441772
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,1536,4,0,0.09374719858169556
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,1536,8,0,0.05458719730377197
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,1536,16,0,0.05729280114173889
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,1536,32,0,0.040703999996185306
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,1536,64,0,0.032364800572395325
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,1536,1,0,0.33290719985961914
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,1536,2,0,0.17792160511016847
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,1536,4,0,0.10393760204315186
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,1536,8,0,0.0634223997592926
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,1536,16,0,0.06830400228500366
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,1536,32,0,0.050526398420333865
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,1536,64,0,0.04123519957065582
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,2048,1,0,0.5216527938842773
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,2048,2,0,0.26687839031219485
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,2048,4,0,0.14008959531784057
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,2048,8,0,0.07543200254440308
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,2048,16,0,0.06807199716567994
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,2048,32,0,0.05623679757118225
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,2048,64,0,0.04014880061149597
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,2048,1,0,0.5293568134307861
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,2048,2,0,0.2778271913528442
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,2048,4,0,0.15194560289382936
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,2048,8,0,0.0876688003540039
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,2048,16,0,0.07767999768257142
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,2048,32,0,0.06872320175170898
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,2048,64,0,0.05087199807167053
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,3072,1,0,1.0609600067138671
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,3072,2,0,0.554099178314209
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,3072,4,0,0.2848880052566528
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,3072,8,0,0.164137601852417
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,3072,16,0,0.0998960018157959
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,3072,32,0,0.08162559866905213
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,3072,64,0,0.0600928008556366
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,3072,1,0,1.0791184425354003
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,3072,2,0,0.5532400131225585
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,3072,4,0,0.2920464038848877
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,3072,8,0,0.17332160472869873
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,3072,16,0,0.10883040428161621
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,3072,32,0,0.09366400241851806
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,3072,64,0,0.07229120135307313
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,4096,1,0,1.821945571899414
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,4096,2,0,0.9626432418823242
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,4096,4,0,0.49179677963256835
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,4096,8,0,0.2574575901031494
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,4096,16,0,0.13929920196533202
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,4096,32,0,0.13186559677124024
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,4096,64,0,0.08889920115470887
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,4096,1,0,1.8151023864746094
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,4096,2,0,0.9355055809020996
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,4096,4,0,0.4826176166534424
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,4096,8,0,0.2611151933670044
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,4096,16,0,0.149017596244812
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,4096,32,0,0.1380527973175049
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,4096,64,0,0.10426559448242187
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,6144,1,0,4.121192169189453
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,6144,2,0,2.159828758239746
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,6144,4,0,1.06527681350708
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,6144,8,0,0.5505119800567627
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,6144,16,0,0.30639359951019285
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,6144,32,0,0.19183679819107055
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,6144,64,0,0.13237600326538085
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,6144,1,0,4.0733600616455075
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,6144,2,0,2.0325775146484375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,6144,4,0,1.0440223693847657
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,6144,8,0,0.530467176437378
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,6144,16,0,0.31227679252624513
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,6144,32,0,0.20739519596099854
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,6144,64,0,0.14888960123062134
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,8192,1,0,7.526723480224609
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,8192,2,0,3.787113571166992
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,8192,4,0,1.8935440063476563
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,8192,8,0,0.9931952476501464
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,8192,16,0,0.5053103923797607
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,8192,32,0,0.262993597984314
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,8192,64,0,0.25811679363250734
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,8192,1,0,7.161830139160156
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,8192,2,0,3.5885265350341795
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,8192,4,0,1.8074735641479491
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,8192,8,0,0.914680004119873
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,8192,16,0,0.48394079208374025
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,8192,32,0,0.2731071949005127
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,8192,64,0,0.26288800239562987
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,10240,1,0,11.677828979492187
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,10240,2,0,5.9130912780761715
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,10240,4,0,2.992857551574707
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,10240,8,0,1.4788064002990722
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,10240,16,0,0.6966815948486328
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,10240,32,0,0.3961359977722168
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,10240,64,0,0.29951679706573486
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,10240,1,0,11.209950256347657
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,10240,2,0,5.548419189453125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,10240,4,0,2.770948791503906
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,10240,8,0,1.418729591369629
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,10240,16,0,0.7419248104095459
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,10240,32,0,0.43316001892089845
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,10240,64,0,0.33229119777679444
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,12288,1,0,17.217803955078125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,12288,2,0,8.453521728515625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,12288,4,0,4.167726516723633
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,12288,8,0,2.091433525085449
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,12288,16,0,1.0476991653442382
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,12288,32,0,0.6231247901916503
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,12288,64,0,0.4010591983795166
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,12288,1,0,16.016342163085938
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,12288,2,0,7.990335845947266
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,12288,4,0,4.0369823455810545
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,12288,8,0,2.076375961303711
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,12288,16,0,1.054745578765869
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,12288,32,0,0.5970751762390136
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,12288,64,0,0.3902656078338623
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,16384,1,0,29.73143615722656
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,16384,2,0,15.099131774902343
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,16384,4,0,7.434742736816406
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,16384,8,0,3.6839344024658205
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,16384,16,0,1.8433216094970704
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,16384,32,0,0.9360159873962403
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,16384,64,0,0.5158624172210693
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,16384,1,0,28.54522705078125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,16384,2,0,14.30102081298828
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,16384,4,0,7.167041778564453
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,16384,8,0,3.598428726196289
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,16384,16,0,1.8113775253295898
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,16384,32,0,0.9513392448425293
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,16384,64,0,0.5167488098144531
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,32768,1,0,117.8539794921875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,32768,2,0,59.09578857421875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,32768,4,0,29.622369384765626
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,32768,8,0,14.972724914550781
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,32768,16,0,7.52044677734375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,32768,32,0,3.7355121612548827
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,32768,64,0,1.9211280822753907
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,32768,1,0,112.42208251953124
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,32768,2,0,56.4250244140625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,32768,4,0,28.426132202148438
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,32768,8,0,14.264187622070313
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,32768,16,0,7.179055786132812
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,32768,32,0,3.560782241821289
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,32768,64,0,1.8750368118286134
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,1,1,0,0.013305599987506866
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,1,2,0,0.012863999605178833
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,1,4,0,0.01260959953069687
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,1,8,0,0.01247519999742508
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,1,16,0,0.012363199889659882
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,1,32,0,0.012300799787044524
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,1,64,0,0.012332800030708312
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,1,1,0,0.01884160041809082
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,1,2,0,0.01868959963321686
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,1,4,0,0.01844799965620041
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,1,8,0,0.01815840005874634
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,1,16,0,0.018190400302410127
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,1,32,0,0.018169599771499633
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,1,64,0,0.017961600422859193
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,16,1,0,0.01456640064716339
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,16,2,0,0.013425600528717042
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,16,4,0,0.013441599905490875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,16,8,0,0.013446399569511413
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,16,16,0,0.013100799918174744
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,16,32,0,0.01249760016798973
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,16,64,0,0.012305600196123123
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,16,1,0,0.0202224001288414
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,16,2,0,0.0191103994846344
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,16,4,0,0.018886399269104005
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,16,8,0,0.018873600661754607
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,16,16,0,0.018542400002479552
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,16,32,0,0.01823360025882721
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,16,64,0,0.01801439970731735
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,32,1,0,0.014478400349617004
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,32,2,0,0.014169600605964661
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,32,4,0,0.013299199938774108
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,32,8,0,0.01327199935913086
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,32,16,0,0.01329279989004135
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,32,32,0,0.012980799376964568
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,32,64,0,0.012593600153923034
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,32,1,0,0.021238400042057036
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,32,2,0,0.020319999754428865
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,32,4,0,0.019303999841213226
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,32,8,0,0.019089600443840025
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,32,16,0,0.018991999328136444
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,32,32,0,0.018607999384403228
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,32,64,0,0.018329599499702455
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,64,1,0,0.015291200578212738
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,64,2,0,0.014776000380516052
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,64,4,0,0.014369599521160126
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,64,8,0,0.013519999384880067
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,64,16,0,0.013340799510478974
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,64,32,0,0.013300800323486328
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,64,64,0,0.013064000010490417
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,64,1,0,0.021568000316619873
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,64,2,0,0.0212351992726326
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,64,4,0,0.020448000729084016
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,64,8,0,0.019244800508022308
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,64,16,0,0.018849599361419677
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,64,32,0,0.018915200233459474
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,64,64,0,0.01897599995136261
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,128,1,0,0.02542720139026642
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,128,2,0,0.016681599617004394
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,128,4,0,0.01650079935789108
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,128,8,0,0.01605760008096695
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,128,16,0,0.015492799878120422
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,128,32,0,0.015107199549674988
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,128,64,0,0.01438400000333786
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,128,1,0,0.03270399868488312
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,128,2,0,0.02353599965572357
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,128,4,0,0.022430400550365447
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,128,8,0,0.022383999824523926
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,128,16,0,0.022227199375629426
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,128,32,0,0.021135999262332915
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,128,64,0,0.02007199972867966
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,256,1,0,0.053523200750350955
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,256,2,0,0.02834559977054596
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,256,4,0,0.020691199600696562
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,256,8,0,0.02005600035190582
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,256,16,0,0.019980800151824952
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,256,32,0,0.019363200664520262
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,256,64,0,0.01833280026912689
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,256,1,0,0.06307839751243591
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,256,2,0,0.03569119870662689
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,256,4,0,0.027323201298713684
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,256,8,0,0.026848000288009644
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,256,16,0,0.026705598831176756
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,256,32,0,0.025619199872016905
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,256,64,0,0.024033600091934205
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,512,1,0,0.12050399780273438
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,512,2,0,0.0660207986831665
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,512,4,0,0.03595199882984161
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,512,8,0,0.03086720108985901
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,512,16,0,0.03480319976806641
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,512,32,0,0.02860639989376068
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,512,64,0,0.023531199991703035
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,512,1,0,0.1309056043624878
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,512,2,0,0.07607839703559875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,512,4,0,0.04467200040817261
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,512,8,0,0.03809599876403809
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,512,16,0,0.04366880059242249
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,512,32,0,0.03590880036354065
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,512,64,0,0.030531200766563415
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,1024,1,0,0.32663679122924805
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,1024,2,0,0.17050559520721437
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,1024,4,0,0.09192000031471252
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,1024,8,0,0.05145760178565979
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,1024,16,0,0.046331200003623965
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,1024,32,0,0.043961599469184875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,1024,64,0,0.033764800429344176
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,1024,1,0,0.3358479976654053
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,1024,2,0,0.18158559799194335
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,1024,4,0,0.10346399545669556
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,1024,8,0,0.06361759901046753
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,1024,16,0,0.05592319965362549
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,1024,32,0,0.05610079765319824
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,1024,64,0,0.04437119960784912
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,1536,1,0,0.6284175872802734
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,1536,2,0,0.32150719165802
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,1536,4,0,0.16951680183410645
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,1536,8,0,0.0966319978237152
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,1536,16,0,0.0636896014213562
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,1536,32,0,0.07014080286026
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,1536,64,0,0.04651359915733337
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,1536,1,0,0.6428512096405029
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,1536,2,0,0.33544800281524656
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,1536,4,0,0.183569598197937
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,1536,8,0,0.11128480434417724
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,1536,16,0,0.07654399871826172
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,1536,32,0,0.0839631974697113
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,1536,64,0,0.059950399398803714
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,2048,1,0,1.0270591735839845
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,2048,2,0,0.5370319843292236
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,2048,4,0,0.2756351947784424
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,2048,8,0,0.1491935968399048
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,2048,16,0,0.0835632026195526
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,2048,32,0,0.07831519842147827
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,2048,64,0,0.06383839845657349
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,2048,1,0,1.0489168167114258
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,2048,2,0,0.5411871910095215
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,2048,4,0,0.2880687952041626
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,2048,8,0,0.16421120166778563
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,2048,16,0,0.09911680221557617
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,2048,32,0,0.09122719764709472
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,2048,64,0,0.07972639799118042
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,3072,1,0,2.160206413269043
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,3072,2,0,1.1152640342712403
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,3072,4,0,0.580452823638916
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,3072,8,0,0.3011951923370361
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,3072,16,0,0.1743888020515442
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,3072,32,0,0.11528160572052001
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,3072,64,0,0.10433599948883057
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,3072,1,0,2.1569263458251955
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,3072,2,0,1.0879023551940918
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,3072,4,0,0.5657839775085449
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,3072,8,0,0.3082767963409424
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,3072,16,0,0.18910720348358154
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,3072,32,0,0.1329327940940857
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,3072,64,0,0.12076159715652465
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,4096,1,0,3.6820449829101562
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,4096,2,0,1.9384191513061524
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,4096,4,0,1.0080816268920898
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,4096,8,0,0.5112783908843994
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,4096,16,0,0.27211039066314696
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,4096,32,0,0.1501423954963684
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,4096,64,0,0.14432320594787598
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,4096,1,0,3.762607955932617
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,4096,2,0,1.9208768844604491
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,4096,4,0,0.973414421081543
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,4096,8,0,0.5142032146453858
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,4096,16,0,0.2829008102416992
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,4096,32,0,0.17073119878768922
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,4096,64,0,0.16614880561828613
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,6144,1,0,8.711505889892578
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,6144,2,0,4.350888061523437
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,6144,4,0,2.211454391479492
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,6144,8,0,1.175879955291748
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,6144,16,0,0.60207200050354
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,6144,32,0,0.3337455987930298
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,6144,64,0,0.22673120498657226
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,6144,1,0,8.327372741699218
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,6144,2,0,4.164023971557617
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,6144,4,0,2.1450624465942383
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,6144,8,0,1.1270480155944824
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,6144,16,0,0.5754479885101318
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,6144,32,0,0.3485104084014893
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,6144,64,0,0.24447200298309327
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,8192,1,0,15.33604736328125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,8192,2,0,7.836262512207031
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,8192,4,0,3.788995361328125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,8192,8,0,1.9066688537597656
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,8192,16,0,0.9814751625061036
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,8192,32,0,0.5116384029388428
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,8192,64,0,0.28583199977874757
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,8192,1,0,14.564349365234374
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,8192,2,0,7.3751983642578125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,8192,4,0,3.7172927856445312
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,8192,8,0,1.8696928024291992
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,8192,16,0,0.9715359687805176
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,8192,32,0,0.5244336128234863
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,8192,64,0,0.3098000049591064
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,10240,1,0,23.75969696044922
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,10240,2,0,12.076102447509765
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,10240,4,0,5.964616012573242
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,10240,8,0,2.9548784255981446
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,10240,16,0,1.5113792419433594
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,10240,32,0,0.7980527877807617
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,10240,64,0,0.4742527961730957
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,10240,1,0,22.71399688720703
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,10240,2,0,11.477937316894531
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,10240,4,0,5.703137588500977
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,10240,8,0,2.836702346801758
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,10240,16,0,1.4670576095581054
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,10240,32,0,0.800926399230957
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,10240,64,0,0.47361440658569337
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,12288,1,0,34.25322265625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,12288,2,0,16.98118896484375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,12288,4,0,8.624375915527343
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,12288,8,0,4.270958328247071
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,12288,16,0,2.1163936614990235
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,12288,32,0,1.1077168464660645
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,12288,64,0,0.6634751796722412
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,12288,1,0,32.597409057617185
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,12288,2,0,16.1760498046875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,12288,4,0,8.179727935791016
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,12288,8,0,4.089102554321289
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,12288,16,0,2.0270559310913088
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,12288,32,0,1.1004128456115723
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,12288,64,0,0.6702960014343262
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,16384,1,0,59.93113403320312
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,16384,2,0,30.23319091796875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,16384,4,0,15.174385070800781
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,16384,8,0,7.545315551757812
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,16384,16,0,3.7933345794677735
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,16384,32,0,1.910825538635254
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,16384,64,0,0.9990431785583496
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,16384,1,0,57.25491943359375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,16384,2,0,28.808004760742186
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,16384,4,0,14.515792846679688
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,16384,8,0,7.327114868164062
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,16384,16,0,3.679966354370117
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,16384,32,0,1.8619247436523438
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,16384,64,0,1.06245756149292
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,32768,1,0,235.8708740234375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,32768,2,0,118.2137939453125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,32768,4,0,59.41688232421875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,32768,8,0,29.88634338378906
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,32768,16,0,14.951336669921876
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,32768,32,0,7.537022399902344
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,32768,64,0,3.81343994140625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,32768,1,0,224.4922119140625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,32768,2,0,112.81829833984375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,32768,4,0,56.609716796875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,32768,8,0,27.94322814941406
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,32768,16,0,15.16949920654297
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,32768,32,0,7.165822601318359
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,32768,64,0,3.67431526184082
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,1,1,0,0.013147200644016265
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,1,2,0,0.01263359934091568
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,1,4,0,0.012028799951076507
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,1,8,0,0.011865600198507308
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,1,16,0,0.011777599900960922
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,1,32,0,0.012062399834394454
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,1,64,0,0.011982399970293045
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,1,1,0,0.01855359971523285
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,1,2,0,0.01762399971485138
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,1,4,0,0.01748960018157959
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,1,8,0,0.017263999581336974
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,1,16,0,0.017188799381256104
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,1,32,0,0.0171984001994133
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,1,64,0,0.017350399494171144
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,16,1,0,0.01499519944190979
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,16,2,0,0.014299200475215912
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,16,4,0,0.013732799887657165
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,16,8,0,0.013460800051689148
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,16,16,0,0.013044799864292144
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,16,32,0,0.012427199631929398
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,16,64,0,0.012100800126791
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,16,1,0,0.021063999831676485
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,16,2,0,0.020824000239372253
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,16,4,0,0.01921759992837906
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,16,8,0,0.019201600551605226
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,16,16,0,0.018726399540901183
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,16,32,0,0.018251200020313264
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,16,64,0,0.017880000174045563
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,32,1,0,0.015107199549674988
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,32,2,0,0.014742399752140044
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,32,4,0,0.014319999516010285
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,32,8,0,0.013628800213336945
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,32,16,0,0.013515199720859527
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,32,32,0,0.013092799484729767
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,32,64,0,0.012433599680662155
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,32,1,0,0.02160319983959198
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,32,2,0,0.02134400010108948
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,32,4,0,0.020233599841594695
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,32,8,0,0.019393600523471832
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,32,16,0,0.019118399918079378
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,32,32,0,0.018934400379657747
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,32,64,0,0.01828960031270981
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,64,1,0,0.023932799696922302
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,64,2,0,0.015464000403881073
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,64,4,0,0.014475199580192565
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,64,8,0,0.01451520025730133
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,64,16,0,0.014388799667358398
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,64,32,0,0.013724799454212188
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,64,64,0,0.013601599633693695
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,64,1,0,0.03208479881286621
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,64,2,0,0.02211360037326813
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,64,4,0,0.021063999831676485
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,64,8,0,0.020891200006008147
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,64,16,0,0.020737600326538087
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,64,32,0,0.019524799287319185
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,64,64,0,0.019174399971961974
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,128,1,0,0.04735040068626404
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,128,2,0,0.026307201385498045
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,128,4,0,0.017694400250911714
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,128,8,0,0.017150400578975676
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,128,16,0,0.017140799760818483
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,128,32,0,0.016310399770736693
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,128,64,0,0.015211200714111328
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,128,1,0,0.0572816014289856
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,128,2,0,0.033583998680114746
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,128,4,0,0.02417439967393875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,128,8,0,0.0235727995634079
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,128,16,0,0.023217600584030152
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,128,32,0,0.02264160066843033
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,128,64,0,0.021092799305915833
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,256,1,0,0.09575999975204467
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,256,2,0,0.054692798852920534
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,256,4,0,0.03231039941310883
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,256,8,0,0.022124800086021423
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,256,16,0,0.021803200244903564
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,256,32,0,0.021828800439834595
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,256,64,0,0.020295999944210052
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,256,1,0,0.10528320074081421
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,256,2,0,0.06479679942131042
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,256,4,0,0.04103040099143982
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,256,8,0,0.029414400458335876
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,256,16,0,0.028705599904060363
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,256,32,0,0.02861439883708954
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,256,64,0,0.026868799328804018
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,512,1,0,0.22494080066680908
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,512,2,0,0.12075519561767578
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,512,4,0,0.06829919815063476
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,512,8,0,0.050670397281646726
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,512,16,0,0.03391839861869812
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,512,32,0,0.03780960142612457
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,512,64,0,0.031630399823188785
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,512,1,0,0.23654720783233643
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,512,2,0,0.13265759944915773
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,512,4,0,0.08028159737586975
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,512,8,0,0.06366400122642517
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,512,16,0,0.04364640116691589
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,512,32,0,0.04868960082530975
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,512,64,0,0.04172320067882538
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,1024,1,0,0.6336575984954834
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,1024,2,0,0.3284816026687622
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,1024,4,0,0.17410720586776735
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,1024,8,0,0.09778559803962708
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,1024,16,0,0.07543359994888306
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,1024,32,0,0.05249919891357422
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,1024,64,0,0.05054879784584045
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,1024,1,0,0.6487664222717285
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,1024,2,0,0.3438944101333618
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,1024,4,0,0.1897104024887085
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,1024,8,0,0.1137727975845337
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,1024,16,0,0.0917568027973175
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,1024,32,0,0.06635040044784546
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,1024,64,0,0.06687039732933045
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,1536,1,0,1.2419648170471191
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,1536,2,0,0.6341839790344238
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,1536,4,0,0.3346368074417114
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,1536,8,0,0.18322880268096925
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,1536,16,0,0.11932320594787597
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,1536,32,0,0.08579519987106324
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,1536,64,0,0.08346239924430847
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,1536,1,0,1.2622927665710448
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,1536,2,0,0.6512080192565918
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,1536,4,0,0.3467456102371216
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,1536,8,0,0.19887839555740355
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,1536,16,0,0.13744640350341797
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,1536,32,0,0.10631680488586426
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,1536,64,0,0.10265120267868041
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,2048,1,0,2.0497600555419924
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,2048,2,0,1.0576576232910155
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,2048,4,0,0.5502816200256347
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,2048,8,0,0.29302239418029785
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,2048,16,0,0.1622223973274231
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,2048,32,0,0.12906880378723146
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,2048,64,0,0.09132959842681884
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,2048,1,0,2.07040958404541
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,2048,2,0,1.0898816108703613
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,2048,4,0,0.5715695858001709
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,2048,8,0,0.3172544002532959
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,2048,16,0,0.18552639484405517
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,2048,32,0,0.15095679759979247
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,2048,64,0,0.11546880006790161
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,3072,1,0,4.551889419555664
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,3072,2,0,2.14913272857666
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,3072,4,0,1.1247039794921876
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,3072,8,0,0.5749807834625245
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,3072,16,0,0.3141983985900879
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,3072,32,0,0.20743041038513182
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,3072,64,0,0.15069600343704223
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,3072,1,0,4.415172958374024
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,3072,2,0,2.163049507141113
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,3072,4,0,1.1130288124084473
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,3072,8,0,0.5870575904846191
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,3072,16,0,0.33988959789276124
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,3072,32,0,0.2367856025695801
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,3072,64,0,0.17989920377731322
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,4096,1,0,8.019760131835938
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,4096,2,0,3.998107147216797
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,4096,4,0,1.9986560821533204
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,4096,8,0,1.03570556640625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,4096,16,0,0.5358128070831298
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,4096,32,0,0.2991296052932739
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,4096,64,0,0.23853919506072999
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,4096,1,0,7.669019317626953
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,4096,2,0,3.8230815887451173
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,4096,4,0,1.9334016799926759
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,4096,8,0,1.0581104278564453
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,4096,16,0,0.5873536109924317
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,4096,32,0,0.3334736108779907
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,4096,64,0,0.27470400333404543
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,6144,1,0,17.702987670898438
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,6144,2,0,8.770256042480469
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,6144,4,0,4.375960159301758
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,6144,8,0,2.2491952896118166
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,6144,16,0,1.1237327575683593
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,6144,32,0,0.6236176013946533
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,6144,64,0,0.4015679836273193
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,6144,1,0,16.72490234375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,6144,2,0,8.344452667236329
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,6144,4,0,4.233113479614258
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,6144,8,0,2.152471923828125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,6144,16,0,1.1313183784484864
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,6144,32,0,0.6552351951599121
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,6144,64,0,0.4428368091583252
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,8192,1,0,30.82652587890625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,8192,2,0,15.306877136230469
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,8192,4,0,7.7593025207519535
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,8192,8,0,3.8564079284667967
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,8192,16,0,1.9172304153442383
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,8192,32,0,1.0332960128784179
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,8192,64,0,0.5547103881835938
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,8192,1,0,29.364437866210938
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,8192,2,0,14.852525329589843
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,8192,4,0,7.402758026123047
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,8192,8,0,3.70184326171875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,8192,16,0,1.912468719482422
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,8192,32,0,1.0541215896606446
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,8192,64,0,0.600486421585083
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,10240,1,0,47.79214172363281
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,10240,2,0,23.71280517578125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,10240,4,0,12.164119720458984
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,10240,8,0,5.92260971069336
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,10240,16,0,2.9545583724975586
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,10240,32,0,1.552830410003662
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,10240,64,0,0.8771568298339844
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,10240,1,0,45.5423095703125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,10240,2,0,22.343675231933595
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,10240,4,0,11.223702239990235
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,10240,8,0,6.477521514892578
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,10240,16,0,2.88720645904541
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,10240,32,0,1.6196544647216797
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,10240,64,0,0.9373231887817383
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,12288,1,0,68.19428100585938
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,12288,2,0,33.826089477539064
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,12288,4,0,17.28460693359375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,12288,8,0,8.478952026367187
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,12288,16,0,4.278555297851563
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,12288,32,0,2.131875228881836
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,12288,64,0,1.1993056297302247
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,12288,1,0,64.67413330078125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,12288,2,0,32.67896423339844
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,12288,4,0,16.442092895507812
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,12288,8,0,8.219142150878906
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,12288,16,0,4.193601608276367
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,12288,32,0,2.1909248352050783
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,12288,64,0,1.2617199897766114
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,16384,1,0,119.59986572265625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,16384,2,0,60.0433349609375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,16384,4,0,30.46766357421875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,16384,8,0,15.024217224121093
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,16384,16,0,7.6098480224609375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,16384,32,0,3.890729522705078
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,16384,64,0,2.031417655944824
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,16384,1,0,114.07652587890625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,16384,2,0,57.2738037109375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,16384,4,0,29.020248413085938
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,16384,8,0,14.490031433105468
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,16384,16,0,7.364340972900391
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,16384,32,0,3.773748779296875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,16384,64,0,2.041756820678711
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,1,1,0,0.013276800513267517
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,1,2,0,0.012654399871826172
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,1,4,0,0.01268800050020218
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,1,8,0,0.012336000055074691
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,1,16,0,0.012683199346065521
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,1,32,0,0.012481600046157837
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,1,64,0,0.012292800098657608
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,1,1,0,0.018804800510406495
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,1,2,0,0.01836639940738678
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,1,4,0,0.01812639981508255
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,1,8,0,0.018174399435520173
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,1,16,0,0.01809920072555542
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,1,32,0,0.018025599420070648
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,1,64,0,0.017899200320243835
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,16,1,0,0.015699200332164764
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,16,2,0,0.014971199631690978
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,16,4,0,0.014697599411010741
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,16,8,0,0.014209599792957306
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,16,16,0,0.013811199367046357
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,16,32,0,0.013191999495029449
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,16,64,0,0.012993599474430084
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,16,1,0,0.022191999852657317
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,16,2,0,0.02144159972667694
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,16,4,0,0.020577600598335265
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,16,8,0,0.019659200310707094
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,16,16,0,0.01926400065422058
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,16,32,0,0.018777599930763243
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,16,64,0,0.018603199720382692
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,32,1,0,0.023904000222682954
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,32,2,0,0.015526400506496429
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,32,4,0,0.01478559970855713
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,32,8,0,0.014684799313545226
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,32,16,0,0.014148800075054169
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,32,32,0,0.013767999410629273
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,32,64,0,0.013228799402713775
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,32,1,0,0.03136000037193298
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,32,2,0,0.02247200012207031
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,32,4,0,0.021273599565029146
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,32,8,0,0.02128320038318634
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,32,16,0,0.020635199546813966
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,32,32,0,0.019539199769496918
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,32,64,0,0.01918880045413971
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,64,1,0,0.04518559873104096
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,64,2,0,0.024134400486946105
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,64,4,0,0.016545599699020384
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,64,8,0,0.015643200278282164
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,64,16,0,0.01560479998588562
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,64,32,0,0.015036800503730774
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,64,64,0,0.014767999947071075
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,64,1,0,0.05481119751930237
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,64,2,0,0.031113600730895995
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,64,4,0,0.02327519953250885
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,64,8,0,0.02266400009393692
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,64,16,0,0.022228799760341644
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,64,32,0,0.021134400367736818
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,64,64,0,0.020572799444198608
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,128,1,0,0.08405439853668213
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,128,2,0,0.04927839934825897
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,128,4,0,0.028191998600959778
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,128,8,0,0.019236800074577332
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,128,16,0,0.018702399730682374
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,128,32,0,0.01897120028734207
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,128,64,0,0.017532800137996674
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,128,1,0,0.0936896026134491
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,128,2,0,0.05922880172729492
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,128,4,0,0.036606401205062866
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,128,8,0,0.026289600133895873
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,128,16,0,0.025526401400566102
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,128,32,0,0.025736001133918763
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,128,64,0,0.023763200640678404
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,256,1,0,0.17663199901580812
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,256,2,0,0.09724479913711548
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,256,4,0,0.05751360058784485
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,256,8,0,0.03661920130252838
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,256,16,0,0.025228801369667053
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,256,32,0,0.025139200687408447
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,256,64,0,0.024908800423145295
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,256,1,0,0.18778239488601683
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,256,2,0,0.10951679944992065
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,256,4,0,0.06951040029525757
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,256,8,0,0.04981440007686615
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,256,16,0,0.035464000701904294
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,256,32,0,0.03457120060920715
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,256,64,0,0.03372960090637207
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,512,1,0,0.4374271869659424
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,512,2,0,0.231331205368042
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,512,4,0,0.12594720125198364
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,512,8,0,0.0891439974308014
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,512,16,0,0.06008800268173218
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,512,32,0,0.040320000052452086
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,512,64,0,0.0444815993309021
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,512,1,0,0.45194239616394044
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,512,2,0,0.24690399169921876
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,512,4,0,0.1420192003250122
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,512,8,0,0.10580320358276367
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,512,16,0,0.07703520059585571
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,512,32,0,0.05419039726257324
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,512,64,0,0.06061440110206604
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,1024,1,0,1.251535987854004
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,1024,2,0,0.6530896186828613
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,1024,4,0,0.340449595451355
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,1024,8,0,0.18940320014953613
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,1024,16,0,0.13388160467147828
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,1024,32,0,0.0940064013004303
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,1024,64,0,0.06644160151481629
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,1024,1,0,1.2810192108154297
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,1024,2,0,0.6702112197875977
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,1024,4,0,0.3604111909866333
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,1024,8,0,0.21113600730895996
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,1024,16,0,0.1561776041984558
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,1024,32,0,0.11788159608840942
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,1024,64,0,0.09072319865226745
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,1536,1,0,2.535145568847656
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,1536,2,0,1.330190372467041
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,1536,4,0,0.6787168025970459
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,1536,8,0,0.3585871934890747
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,1536,16,0,0.2296992063522339
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,1536,32,0,0.1525424003601074
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,1536,64,0,0.1127392053604126
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,1536,1,0,2.5270383834838865
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,1536,2,0,1.3259424209594726
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,1536,4,0,0.692739200592041
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,1536,8,0,0.3815376043319702
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,1536,16,0,0.2546288013458252
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,1536,32,0,0.18246560096740722
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,1536,64,0,0.14439840316772462
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,2048,1,0,4.324307250976562
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,2048,2,0,2.2093584060668947
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,2048,4,0,1.1358464241027832
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,2048,8,0,0.5913424015045166
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,2048,16,0,0.32364161014556886
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,2048,32,0,0.22546560764312745
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,2048,64,0,0.16294560432434083
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,2048,1,0,4.262392044067383
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,2048,2,0,2.164227294921875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,2048,4,0,1.118734359741211
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,2048,8,0,0.6107711791992188
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,2048,16,0,0.35308160781860354
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,2048,32,0,0.26131839752197267
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,2048,64,0,0.20006880760192872
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,3072,1,0,9.385244750976563
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,3072,2,0,4.725739288330078
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,3072,4,0,2.3045616149902344
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,3072,8,0,1.2145919799804688
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,3072,16,0,0.6519408226013184
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,3072,32,0,0.41298561096191405
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,3072,64,0,0.2717632055282593
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,3072,1,0,8.963294219970702
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,3072,2,0,4.5560047149658205
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,3072,4,0,2.3317743301391602
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,3072,8,0,1.2047792434692384
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,3072,16,0,0.6715055942535401
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,3072,32,0,0.44508957862854004
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,3072,64,0,0.32388958930969236
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,4096,1,0,16.179701232910155
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,4096,2,0,8.07793731689453
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,4096,4,0,4.0809776306152346
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,4096,8,0,2.114004707336426
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,4096,16,0,1.0092687606811523
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,4096,32,0,0.5721759796142578
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,4096,64,0,0.4034815788269043
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,4096,1,0,15.431718444824218
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,4096,2,0,7.687985229492187
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,4096,4,0,3.863068771362305
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,4096,8,0,2.022166442871094
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,4096,16,0,1.1459888458251952
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,4096,32,0,0.6683728218078613
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,4096,64,0,0.48134560585021974
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,6144,1,0,35.21388244628906
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,6144,2,0,17.662501525878906
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,6144,4,0,8.839766693115234
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,6144,8,0,4.419758224487305
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,6144,16,0,2.256572723388672
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,6144,32,0,1.2452207565307618
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,6144,64,0,0.7711535930633545
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,6144,1,0,33.71366577148437
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,6144,2,0,17.00814208984375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,6144,4,0,8.516126251220703
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,6144,8,0,4.329423904418945
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,6144,16,0,2.2469167709350586
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,6144,32,0,1.3088047981262207
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,6144,64,0,0.828548812866211
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,8192,1,0,61.607843017578126
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,8192,2,0,30.97730407714844
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,8192,4,0,15.60101318359375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,8192,8,0,7.825936126708984
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,8192,16,0,3.9653648376464843
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,8192,32,0,2.0240032196044924
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,8192,64,0,1.071793556213379
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,8192,1,0,58.803826904296876
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,8192,2,0,29.603756713867188
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,8192,4,0,15.014849853515624
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,8192,8,0,7.556690979003906
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,8192,16,0,3.8853759765625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,8192,32,0,2.039619255065918
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,8192,64,0,1.20239200592041
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,1,1,0,0.013326400518417358
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,1,2,0,0.012433599680662155
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,1,4,0,0.011966399848461151
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,1,8,0,0.01199359968304634
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,1,16,0,0.011795199662446975
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,1,32,0,0.011923199892044068
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,1,64,0,0.011745599657297134
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,1,1,0,0.018489600718021394
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,1,2,0,0.017907199263572694
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,1,4,0,0.01777759939432144
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,1,8,0,0.017350399494171144
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,1,16,0,0.017688000202178956
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,1,32,0,0.017627200484275816
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,1,64,0,0.017534400522708892
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,16,1,0,0.024460799992084503
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,16,2,0,0.015515199303627015
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,16,4,0,0.014641599357128143
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,16,8,0,0.014747199416160584
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,16,16,0,0.013988800346851349
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,16,32,0,0.01358560025691986
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,16,64,0,0.013107199966907502
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,16,1,0,0.03201920092105866
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,16,2,0,0.02228800058364868
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,16,4,0,0.02131360024213791
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,16,8,0,0.021240000426769257
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,16,16,0,0.01990399956703186
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,16,32,0,0.0195375993847847
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,16,64,0,0.019526399672031403
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,32,1,0,0.04535999894142151
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,32,2,0,0.02415200024843216
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,32,4,0,0.016604800522327424
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,32,8,0,0.015590399503707886
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,32,16,0,0.015583999454975128
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,32,32,0,0.01459999978542328
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,32,64,0,0.01408960074186325
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,32,1,0,0.05443999767303467
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,32,2,0,0.031164801120758055
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,32,4,0,0.023054400086402894
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,32,8,0,0.02259040027856827
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,32,16,0,0.022006399929523468
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,32,32,0,0.020871999859809875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,32,64,0,0.02021760046482086
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,64,1,0,0.07979360222816467
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,64,2,0,0.04643200039863586
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,64,4,0,0.025891199707984924
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,64,8,0,0.01803999990224838
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,64,16,0,0.01738400012254715
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,64,32,0,0.01748799979686737
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,64,64,0,0.016337600350379945
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,64,1,0,0.09030719995498657
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,64,2,0,0.0565343976020813
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,64,4,0,0.03442400097846985
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,64,8,0,0.025116801261901855
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,64,16,0,0.024395200610160827
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,64,32,0,0.024484799802303316
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,64,64,0,0.02341119945049286
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,128,1,0,0.15283520221710206
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,128,2,0,0.08541920185089111
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,128,4,0,0.05201759934425354
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,128,8,0,0.031615999341011045
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,128,16,0,0.022257600724697114
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,128,32,0,0.022193600237369538
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,128,64,0,0.021380800008773803
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,128,1,0,0.16411679983139038
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,128,2,0,0.09667360186576843
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,128,4,0,0.06413919925689697
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,128,8,0,0.044787201285362246
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,128,16,0,0.0317984014749527
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,128,32,0,0.031472000479698184
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,128,64,0,0.03059520125389099
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,256,1,0,0.3393183946609497
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,256,2,0,0.18314080238342284
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,256,4,0,0.1013200044631958
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,256,8,0,0.06429280042648315
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,256,16,0,0.043971198797225955
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,256,32,0,0.031692799925804135
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,256,64,0,0.03091199994087219
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,256,1,0,0.3543407917022705
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,256,2,0,0.1987712025642395
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,256,4,0,0.11763999462127686
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,256,8,0,0.08083840012550354
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,256,16,0,0.06110240221023559
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,256,32,0,0.0451088011264801
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,256,64,0,0.04368639886379242
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,512,1,0,0.8588768005371094
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,512,2,0,0.45179200172424316
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,512,4,0,0.24055039882659912
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,512,8,0,0.16474720239639282
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,512,16,0,0.10419520139694213
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,512,32,0,0.07459520101547241
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,512,64,0,0.05395200252532959
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,512,1,0,0.8826383590698242
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,512,2,0,0.4766560077667236
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,512,4,0,0.2640496015548706
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,512,8,0,0.18825759887695312
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,512,16,0,0.12836480140686035
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,512,32,0,0.09852160215377807
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,512,64,0,0.07841759920120239
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,1024,1,0,2.493996810913086
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,1024,2,0,1.3222975730895996
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,1024,4,0,0.6940976142883301
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,1024,8,0,0.37113280296325685
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,1024,16,0,0.250598406791687
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,1024,32,0,0.16336640119552612
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,1024,64,0,0.12116479873657227
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,1024,1,0,2.5439359664916994
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,1024,2,0,1.33985595703125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,1024,4,0,0.7044511795043945
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,1024,8,0,0.4022928237915039
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,1024,16,0,0.28557600975036623
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,1024,32,0,0.20269598960876464
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,1024,64,0,0.1611296057701111
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,1536,1,0,5.262033462524414
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,1536,2,0,2.6198448181152343
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,1536,4,0,1.3446703910827638
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,1536,8,0,0.7228000164031982
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,1536,16,0,0.4392704010009766
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,1536,32,0,0.27765440940856934
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,1536,64,0,0.19588799476623536
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,1536,1,0,5.126934432983399
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,1536,2,0,2.5977487564086914
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,1536,4,0,1.3488832473754884
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,1536,8,0,0.742193603515625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,1536,16,0,0.48284001350402833
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,1536,32,0,0.3301215887069702
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,1536,64,0,0.25155038833618165
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,2048,1,0,8.842537689208985
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,2048,2,0,4.417897415161133
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,2048,4,0,2.210523223876953
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,2048,8,0,1.1619183540344238
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,2048,16,0,0.6214320182800293
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,2048,32,0,0.4088143825531006
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,2048,64,0,0.2808448076248169
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,2048,1,0,8.561350250244141
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,2048,2,0,4.399065780639648
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,2048,4,0,2.2578863143920898
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,2048,8,0,1.1974111557006837
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,2048,16,0,0.686575984954834
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,2048,32,0,0.48273439407348634
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,2048,64,0,0.35536799430847166
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,3072,1,0,18.757147216796874
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,3072,2,0,9.399539184570312
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,3072,4,0,4.982006454467774
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,3072,8,0,2.5436304092407225
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,3072,16,0,1.317246437072754
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,3072,32,0,0.7581759929656983
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,3072,64,0,0.48839521408081055
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,3072,1,0,17.924253845214842
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,3072,2,0,9.063172912597656
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,3072,4,0,4.785295867919922
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,3072,8,0,2.529159927368164
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,3072,16,0,1.332798385620117
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,3072,32,0,0.8517359733581543
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,3072,64,0,0.5913599967956543
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,4096,1,0,32.20107421875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,4096,2,0,16.353082275390626
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,4096,4,0,8.140789031982422
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,4096,8,0,4.128563308715821
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,4096,16,0,2.140737533569336
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,4096,32,0,1.1613375663757324
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,4096,64,0,0.7349711894989014
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,4096,1,0,31.30408935546875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,4096,2,0,15.482612609863281
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,4096,4,0,7.82598876953125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,4096,8,0,4.066323089599609
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,4096,16,0,2.178531265258789
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,4096,32,0,1.2582703590393067
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,4096,64,0,0.8658656120300293
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,1,1,0,0.01395999938249588
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,1,2,0,0.01287200003862381
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,1,4,0,0.012608000636100769
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,1,8,0,0.012590399384498597
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,1,16,0,0.012535999715328216
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,1,32,0,0.0123648002743721
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,32,1,64,0,0.012355200201272964
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,1,1,0,0.019065600633621217
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,1,2,0,0.01860959976911545
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,1,4,0,0.01852799952030182
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,1,8,0,0.01842239946126938
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,1,16,0,0.018196800351142885
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,1,32,0,0.018222400546073915
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,32,1,64,0,0.018385599553585052
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,16,1,0,0.0454255998134613
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,16,2,0,0.024371199309825897
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,16,4,0,0.016809600591659545
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,16,8,0,0.01631679981946945
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,16,16,0,0.015715199708938598
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,16,32,0,0.015004800260066986
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,32,16,64,0,0.014795200526714325
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,16,1,0,0.05560320019721985
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,16,2,0,0.03228319883346557
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,16,4,0,0.023553599417209626
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,16,8,0,0.02296479940414429
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,16,16,0,0.022291199862957002
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,16,32,0,0.021300800144672394
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,32,16,64,0,0.02096640020608902
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,32,1,0,0.08023679852485657
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,32,2,0,0.046721601486206056
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,32,4,0,0.026027199625968934
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,32,8,0,0.01833920031785965
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,32,16,0,0.0176144003868103
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,32,32,0,0.01724800020456314
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,32,32,64,0,0.01658080071210861
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,32,1,0,0.09095039963722229
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,32,2,0,0.057004797458648684
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,32,4,0,0.0347680002450943
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,32,8,0,0.02558560073375702
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,32,16,0,0.024673600494861603
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,32,32,0,0.024675199389457704
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,32,32,64,0,0.023555199801921844
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,64,1,0,0.14517600536346437
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,64,2,0,0.0834176003932953
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,64,4,0,0.05000159740447998
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,64,8,0,0.029793599247932435
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,64,16,0,0.02143840044736862
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,64,32,0,0.021012799441814424
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,32,64,64,0,0.020899200439453126
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,64,1,0,0.1561792016029358
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,64,2,0,0.0950160026550293
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,64,4,0,0.061919999122619626
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,64,8,0,0.04300000071525574
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,64,16,0,0.03115360140800476
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,64,32,0,0.030404800176620485
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,32,64,64,0,0.030675199627876282
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,128,1,0,0.29105439186096194
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,128,2,0,0.15567519664764404
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,128,4,0,0.09198399782180786
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,128,8,0,0.05857759714126587
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,128,16,0,0.039531201124191284
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,128,32,0,0.02903519868850708
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,32,128,64,0,0.028091201186180116
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,128,1,0,0.306875205039978
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,128,2,0,0.1718287944793701
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,128,4,0,0.10782239437103272
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,128,8,0,0.0748528003692627
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,128,16,0,0.05610560178756714
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,128,32,0,0.042559999227523806
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,32,128,64,0,0.040870401263237
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,256,1,0,0.6661200046539306
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,256,2,0,0.35586559772491455
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,256,4,0,0.19091360569000243
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,256,8,0,0.11503360271453858
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,256,16,0,0.07807679772377014
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,256,32,0,0.05856639742851257
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,32,256,64,0,0.044486400485038755
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,256,1,0,0.6886640071868897
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,256,2,0,0.3794944047927856
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,256,4,0,0.21483199596405028
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,256,8,0,0.13866560459136962
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,256,16,0,0.10151200294494629
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,256,32,0,0.08239359855651855
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,32,256,64,0,0.0662992000579834
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,512,1,0,1.7023536682128906
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,512,2,0,0.8960559844970704
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,512,4,0,0.4732048034667969
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,512,8,0,0.31757919788360595
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,512,16,0,0.19518719911575316
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,512,32,0,0.1330191969871521
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,32,512,64,0,0.10174880027770997
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,512,1,0,1.7502208709716798
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,512,2,0,0.93787841796875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,512,4,0,0.5100880146026612
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,512,8,0,0.35525760650634763
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,512,16,0,0.2342047929763794
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,512,32,0,0.17276959419250487
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,32,512,64,0,0.14110879898071288
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,1024,1,0,5.141147232055664
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,1024,2,0,2.647649574279785
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,1024,4,0,1.3481776237487793
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,1024,8,0,0.7213871955871582
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,1024,16,0,0.46875839233398436
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,1024,32,0,0.30295519828796386
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,32,1024,64,0,0.21840319633483887
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,1024,1,0,5.13158073425293
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,1024,2,0,2.6476736068725586
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,1024,4,0,1.3923983573913574
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,1024,8,0,0.7880224227905274
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,1024,16,0,0.5426383972167969
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,1024,32,0,0.3775775909423828
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,32,1024,64,0,0.29384000301361085
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,1536,1,0,10.650582122802735
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,1536,2,0,5.333790588378906
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,1536,4,0,2.64148006439209
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,1536,8,0,1.4176624298095704
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,1536,16,0,0.8528719902038574
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,1536,32,0,0.5172624111175537
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,32,1536,64,0,0.35728158950805666
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,1536,1,0,10.311212921142578
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,1536,2,0,5.226160049438477
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,1536,4,0,2.657276725769043
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,1536,8,0,1.4792048454284668
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,1536,16,0,0.9456064224243164
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,1536,32,0,0.6269711971282959
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,32,1536,64,0,0.4662896156311035
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,2048,1,0,17.789364624023438
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,2048,2,0,8.942145538330077
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,2048,4,0,4.650364685058594
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,2048,8,0,2.521096038818359
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,2048,16,0,1.2998687744140625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,2048,32,0,0.7974527835845947
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,32,2048,64,0,0.5210256099700927
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,2048,1,0,17.258062744140624
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,2048,2,0,8.681436920166016
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,2048,4,0,4.490576171875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,2048,8,0,2.5037696838378904
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,2048,16,0,1.4029040336608887
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,2048,32,0,0.9307552337646484
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,32,2048,64,0,0.6636816024780273
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,1,1,0,0.015084800124168397
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,1,2,0,0.014083200693130493
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,1,4,0,0.013572800159454345
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,1,8,0,0.013603200018405915
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,1,16,0,0.013310399651527405
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,1,32,0,0.013593600690364837
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,64,1,64,0,0.013364799320697784
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,1,1,0,0.02003680020570755
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,1,2,0,0.019521600008010863
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,1,4,0,0.019356800615787505
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,1,8,0,0.019236800074577332
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,1,16,0,0.019398400187492372
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,1,32,0,0.019486400485038757
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,64,1,64,0,0.01929280012845993
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,16,1,0,0.08233439922332764
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,16,2,0,0.0480432003736496
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,16,4,0,0.02664799988269806
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,16,8,0,0.01927199959754944
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,16,16,0,0.017972800135612487
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,16,32,0,0.01741439998149872
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,64,16,64,0,0.017118400335311888
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,16,1,0,0.09231839776039123
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,16,2,0,0.05782880187034607
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,16,4,0,0.0350383996963501
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,16,8,0,0.02630079984664917
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,16,16,0,0.025257599353790284
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,16,32,0,0.024516800045967103
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,64,16,64,0,0.023971199989318848
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,32,1,0,0.1462432026863098
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,32,2,0,0.08453760147094727
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,32,4,0,0.051230400800704956
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,32,8,0,0.030649599432945252
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,32,16,0,0.021976000070571898
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,32,32,0,0.02131839990615845
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,64,32,64,0,0.020664000511169435
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,32,1,0,0.15592639446258544
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,32,2,0,0.09597600102424622
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,32,4,0,0.06304479837417602
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,32,8,0,0.04358560144901276
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,32,16,0,0.032051199674606325
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,32,32,0,0.03091199994087219
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,64,32,64,0,0.030076798796653748
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,64,1,0,0.28066880702972413
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,64,2,0,0.15385279655456544
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,64,4,0,0.09120799899101258
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,64,8,0,0.05724319815635681
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,64,16,0,0.038043200969696045
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,64,32,0,0.028240001201629637
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,64,64,64,0,0.027775999903678895
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,64,1,0,0.2894288063049316
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,64,2,0,0.1671471953392029
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,64,4,0,0.10634560585021972
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,64,8,0,0.07296000123023987
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,64,16,0,0.05502079725265503
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,64,32,0,0.04206880033016205
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,64,64,64,0,0.04158560037612915
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,128,1,0,0.5700767993927002
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,128,2,0,0.2981775999069214
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,128,4,0,0.1694591999053955
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,128,8,0,0.1053536057472229
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,128,16,0,0.07256960272789001
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,128,32,0,0.05461440086364746
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,64,128,64,0,0.04276959896087647
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,128,1,0,0.5924335956573487
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,128,2,0,0.32056479454040526
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,128,4,0,0.1925104022026062
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,128,8,0,0.12869759798049926
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,128,16,0,0.09581279754638672
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,128,32,0,0.07803199887275696
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,64,128,64,0,0.06403999924659728
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,256,1,0,1.3199168205261231
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,256,2,0,0.7014895915985108
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,256,4,0,0.3693727970123291
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,256,8,0,0.21895999908447267
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,256,16,0,0.1435215950012207
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,256,32,0,0.10576319694519043
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,64,256,64,0,0.0819760024547577
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,256,1,0,1.359121608734131
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,256,2,0,0.7414463996887207
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,256,4,0,0.408681583404541
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,256,8,0,0.2581536054611206
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,256,16,0,0.18312959671020507
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,256,32,0,0.14578399658203126
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,64,256,64,0,0.11931840181350709
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,512,1,0,3.4586334228515625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,512,2,0,1.821112060546875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,512,4,0,0.9321999549865723
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,512,8,0,0.6173664093017578
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,512,16,0,0.37183361053466796
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,512,32,0,0.24872961044311523
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,64,512,64,0,0.18659679889678954
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,512,1,0,3.4802513122558594
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,512,2,0,1.8613616943359375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,512,4,0,1.0035008430480956
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,512,8,0,0.6889008045196533
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,512,16,0,0.44606242179870603
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,512,32,0,0.32380640506744385
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,64,512,64,0,0.26344799995422363
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,1024,1,0,10.571491241455078
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,1024,2,0,5.376839828491211
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,1024,4,0,2.67171688079834
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,1024,8,0,1.4447615623474122
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,1024,16,0,0.9193296432495117
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,1024,32,0,0.5756912231445312
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,64,1024,64,0,0.40775361061096194
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,1024,1,0,10.483811187744141
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,1024,2,0,5.323624038696289
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,1024,4,0,2.7719520568847655
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,1024,8,0,1.563225555419922
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,1024,16,0,1.0622063636779786
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,1024,32,0,0.7219791889190674
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,64,1024,64,0,0.5534783840179444
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,1,1,0,0.016575999557971954
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,1,2,0,0.015244799852371215
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,1,4,0,0.015204800665378571
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,1,8,0,0.015084800124168397
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,1,16,0,0.014903999865055084
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,1,32,0,0.014788800477981567
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,128,1,64,0,0.01496479958295822
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,1,1,0,0.02284799963235855
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,1,2,0,0.02143999934196472
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,1,4,0,0.020848000049591066
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,1,8,0,0.021121600270271303
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,1,16,0,0.021240000426769257
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,1,32,0,0.020729599893093108
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,128,1,64,0,0.02128159999847412
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,16,1,0,0.14858720302581788
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,16,2,0,0.08568480014801025
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,16,4,0,0.051857602596282956
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,16,8,0,0.031491199135780336
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,16,16,0,0.02316959947347641
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,16,32,0,0.02155199944972992
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,128,16,64,0,0.021753600239753722
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,16,1,0,0.15800479650497437
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,16,2,0,0.09723839759826661
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,16,4,0,0.06406720280647278
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,16,8,0,0.04439679980278015
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,16,16,0,0.033348798751831055
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,16,32,0,0.03132959902286529
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,128,16,64,0,0.03115200102329254
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,32,1,0,0.27771520614624023
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,32,2,0,0.15495200157165528
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,32,4,0,0.09162399768829346
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,32,8,0,0.05816320180892944
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,32,16,0,0.038833600282669065
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,32,32,0,0.029488000273704528
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,128,32,64,0,0.02764959931373596
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,32,1,0,0.2891119956970215
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,32,2,0,0.1677183985710144
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,32,4,0,0.1063647985458374
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,32,8,0,0.07357599735260009
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,32,16,0,0.0555184006690979
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,32,32,0,0.04331200122833252
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,128,32,64,0,0.041131201386451724
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,64,1,0,0.5347296237945557
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,64,2,0,0.2898639917373657
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,64,4,0,0.166975998878479
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,64,8,0,0.10421919822692871
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,64,16,0,0.070523202419281
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,64,32,0,0.05264319777488709
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,128,64,64,0,0.042628800868988036
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,64,1,0,0.5550320148468018
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,64,2,0,0.31002719402313234
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,64,4,0,0.18864320516586303
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,64,8,0,0.12671040296554564
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,64,16,0,0.09367520213127137
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,64,32,0,0.07626399993896485
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,128,64,64,0,0.06501920223236084
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,128,1,0,1.1264320373535157
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,128,2,0,0.5828383922576904
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,128,4,0,0.32527520656585696
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,128,8,0,0.19731199741363525
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,128,16,0,0.1337391972541809
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,128,32,0,0.10053119659423829
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,128,128,64,0,0.07700319886207581
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,128,1,0,1.1658032417297364
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,128,2,0,0.6217967987060546
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,128,4,0,0.36513121128082277
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,128,8,0,0.23749279975891113
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,128,16,0,0.17358880043029784
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,128,32,0,0.14079359769821168
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,128,128,64,0,0.11518880128860473
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,256,1,0,2.6249536514282226
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,256,2,0,1.3924431800842285
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,256,4,0,0.7246975898742676
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,256,8,0,0.42285919189453125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,256,16,0,0.2740272045135498
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,256,32,0,0.19682400226593016
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,128,256,64,0,0.14947839975357055
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,256,1,0,2.6968751907348634
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,256,2,0,1.4703712463378906
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,256,4,0,0.7984608173370361
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,256,8,0,0.4988687992095947
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,256,16,0,0.34792320728302
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,256,32,0,0.27329440116882325
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,128,256,64,0,0.22520639896392822
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,512,1,0,7.002085113525391
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,512,2,0,3.5818416595458986
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,512,4,0,1.853995132446289
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,512,8,0,1.21626558303833
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,512,16,0,0.7265632152557373
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,512,32,0,0.4782144069671631
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,128,512,64,0,0.3538032054901123
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,512,1,0,6.93487548828125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,512,2,0,3.7194496154785157
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,512,4,0,1.9871551513671875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,512,8,0,1.361252784729004
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,512,16,0,0.8731439590454102
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,512,32,0,0.6204527854919434
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,128,512,64,0,0.5009535789489746
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,1,1,0,0.02611039876937866
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,1,2,0,0.02284799963235855
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,1,4,0,0.02223680019378662
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,1,8,0,0.02210240066051483
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,1,16,0,0.022228799760341644
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,1,32,0,0.0217631995677948
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,256,1,64,0,0.02168319970369339
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,1,1,0,0.032548800110816956
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,1,2,0,0.02868480086326599
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,1,4,0,0.028035199642181395
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,1,8,0,0.028100800514221192
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,1,16,0,0.02812960147857666
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,1,32,0,0.027908799052238465
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,256,1,64,0,0.027827200293540955
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,16,1,0,0.27548320293426515
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,16,2,0,0.15493279695510864
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,16,4,0,0.09209280014038086
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,16,8,0,0.05871679782867432
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,16,16,0,0.03878400027751923
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,16,32,0,0.03531680107116699
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,256,16,64,0,0.03436959981918335
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,16,1,0,0.29103200435638427
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,16,2,0,0.1686911940574646
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,16,4,0,0.10772479772567749
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,16,8,0,0.07454879879951477
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,16,16,0,0.05523520112037659
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,16,32,0,0.0491023987531662
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,256,16,64,0,0.04761439859867096
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,32,1,0,0.5358176231384277
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,32,2,0,0.2917952060699463
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,32,4,0,0.16781599521636964
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,32,8,0,0.10563199520111084
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,32,16,0,0.07196159958839417
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,32,32,0,0.052553600072860716
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,256,32,64,0,0.04805760085582733
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,32,1,0,0.5584832191467285
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,32,2,0,0.3109760046005249
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,32,4,0,0.189520001411438
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,32,8,0,0.12785120010375978
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,32,16,0,0.09495360255241395
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,32,32,0,0.07753760218620301
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,256,32,64,0,0.07138559818267823
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,64,1,0,1.061507225036621
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,64,2,0,0.5664591789245605
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,64,4,0,0.3189935922622681
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,64,8,0,0.19510719776153565
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,64,16,0,0.13321280479431152
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,64,32,0,0.09922879934310913
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,256,64,64,0,0.07958400249481201
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,64,1,0,1.0964799880981446
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,64,2,0,0.6078815937042237
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,64,4,0,0.35926721096038816
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,64,8,0,0.23659520149230956
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,64,16,0,0.1733407974243164
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,64,32,0,0.1390079975128174
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,256,64,64,0,0.11910719871520996
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,128,1,0,2.238203239440918
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,128,2,0,1.1493552207946778
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,128,4,0,0.6367487907409668
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,128,8,0,0.38236160278320314
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,128,16,0,0.2515536069869995
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,128,32,0,0.18822239637374877
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,256,128,64,0,0.1453279972076416
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,128,1,0,2.312203216552734
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,128,2,0,1.2228192329406737
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,128,4,0,0.7094128131866455
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,128,8,0,0.4546879768371582
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,128,16,0,0.3271008014678955
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,128,32,0,0.26495840549468996
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,256,128,64,0,0.22075040340423585
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,256,1,0,5.237873458862305
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,256,2,0,2.7744672775268553
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,256,4,0,1.4317551612854005
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,256,8,0,0.8268832206726074
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,256,16,0,0.5316832065582275
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,256,32,0,0.3771456003189087
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,1,256,256,64,0,0.2832063913345337
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,256,1,0,5.382798385620117
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,256,2,0,2.9162031173706056
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,256,4,0,1.5743056297302247
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,256,8,0,0.9767151832580566
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,256,16,0,0.6728047847747802
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,256,32,0,0.5236207962036132
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,256,256,64,0,0.43030881881713867
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,1,1,0,0.012720000743865967
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,1,2,0,0.012268800288438797
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,1,4,0,0.011713600158691407
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,1,8,0,0.011473599821329117
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,1,16,0,0.011390399932861329
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,1,32,0,0.01125440001487732
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,1,64,0,0.011268799751996994
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,1,1,0,0.018244799971580506
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,1,2,0,0.018080000579357148
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,1,4,0,0.017472000420093538
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,1,8,0,0.01717440038919449
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,1,16,0,0.017262400686740877
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,1,32,0,0.017268800735473634
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,1,64,0,0.017289599776268004
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,16,1,0,0.01408800035715103
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,16,2,0,0.013150399923324585
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,16,4,0,0.013040000200271606
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,16,8,0,0.012961600720882416
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,16,16,0,0.012942400574684144
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,16,32,0,0.012726399302482604
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,16,64,0,0.01226240023970604
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,16,1,0,0.020070399343967437
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,16,2,0,0.019041599333286287
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,16,4,0,0.018873600661754607
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,16,8,0,0.018806399405002595
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,16,16,0,0.018481600284576415
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,16,32,0,0.01828320026397705
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,16,64,0,0.017918400466442108
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,32,1,0,0.014550399780273438
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,32,2,0,0.014203199744224548
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,32,4,0,0.013915200531482697
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,32,8,0,0.01313920021057129
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,32,16,0,0.013067199289798737
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,32,32,0,0.013011200726032257
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,32,64,0,0.012862400710582733
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,32,1,0,0.020919999480247496
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,32,2,0,0.019995200634002685
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,32,4,0,0.019152000546455383
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,32,8,0,0.019020800292491914
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,32,16,0,0.01908479928970337
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,32,32,0,0.018915200233459474
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,32,64,0,0.01842239946126938
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,64,1,0,0.01494079977273941
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,64,2,0,0.014534400403499603
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,64,4,0,0.014286400377750396
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,64,8,0,0.013460800051689148
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,64,16,0,0.013140800595283508
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,64,32,0,0.013097600638866424
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,64,64,0,0.01311199963092804
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,64,1,0,0.021775999665260316
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,64,2,0,0.020553599298000335
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,64,4,0,0.020110400021076204
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,64,8,0,0.019038400053977965
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,64,16,0,0.01884640008211136
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,64,32,0,0.018932799994945525
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,64,64,0,0.01881760060787201
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,128,1,0,0.02550559937953949
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,128,2,0,0.016838400065898894
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,128,4,0,0.01611679941415787
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,128,8,0,0.01565759927034378
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,128,16,0,0.014963200688362122
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,128,32,0,0.014753599464893342
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,128,64,0,0.01470080018043518
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,128,1,0,0.032451200485229495
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,128,2,0,0.022708800435066224
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,128,4,0,0.022750400006771088
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,128,8,0,0.021855999529361726
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,128,16,0,0.021572799980640413
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,128,32,0,0.020656000077724456
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,128,64,0,0.020662400126457214
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,256,1,0,0.05300480127334595
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,256,2,0,0.027408000826835633
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,256,4,0,0.01972319930791855
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,256,8,0,0.01964160054922104
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,256,16,0,0.018887999653816222
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,256,32,0,0.01833920031785965
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,256,64,0,0.018217599391937254
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,256,1,0,0.06241599917411804
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,256,2,0,0.03440960049629212
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,256,4,0,0.026214399933815004
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,256,8,0,0.025724801421165466
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,256,16,0,0.025809600949287415
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,256,32,0,0.024291199445724488
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,256,64,0,0.0241007998585701
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,512,1,0,0.12065119743347168
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,512,2,0,0.06598560214042663
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,512,4,0,0.03438720107078552
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,512,8,0,0.02648800015449524
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,512,16,0,0.03300319910049439
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,512,32,0,0.027030399441719054
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,512,64,0,0.022089600563049316
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,512,1,0,0.12963199615478516
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,512,2,0,0.07495200037956237
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,512,4,0,0.04243200123310089
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,512,8,0,0.033190399408340454
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,512,16,0,0.040380799770355226
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,512,32,0,0.03386400043964386
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,512,64,0,0.02871040105819702
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,1024,1,0,0.3244240045547485
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,1024,2,0,0.17009279727935792
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,1024,4,0,0.09033920168876648
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,1024,8,0,0.04822399914264679
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,1024,16,0,0.039952000975608824
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,1024,32,0,0.04073599874973297
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,1024,64,0,0.030697599053382874
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,1024,1,0,0.33437600135803225
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,1024,2,0,0.179748797416687
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,1024,4,0,0.10045919418334961
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,1024,8,0,0.05721759796142578
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,1024,16,0,0.04748159945011139
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,1024,32,0,0.04928480088710785
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,1024,64,0,0.03808000087738037
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,1536,1,0,0.6214176177978515
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,1536,2,0,0.3222671985626221
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,1536,4,0,0.1660591959953308
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,1536,8,0,0.09350720047950745
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,1536,16,0,0.054499202966690065
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,1536,32,0,0.05754719972610474
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,1536,64,0,0.04068799912929535
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,1536,1,0,0.6350736141204834
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,1536,2,0,0.3306463956832886
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,1536,4,0,0.1772320032119751
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,1536,8,0,0.10424799919128418
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,1536,16,0,0.06302719712257385
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,1536,32,0,0.068094402551651
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,1536,64,0,0.049291199445724486
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,2048,1,0,1.0067983627319337
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,2048,2,0,0.5243775844573975
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,2048,4,0,0.26900320053100585
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,2048,8,0,0.14133119583129883
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,2048,16,0,0.07532960176467896
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,2048,32,0,0.0678991973400116
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,2048,64,0,0.05661600232124329
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,2048,1,0,1.0215488433837892
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,2048,2,0,0.5337535858154296
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,2048,4,0,0.2806240081787109
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,2048,8,0,0.1532655954360962
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,2048,16,0,0.08724960088729858
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,2048,32,0,0.07737280130386352
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,2048,64,0,0.0687824010848999
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,3072,1,0,2.0827327728271485
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,3072,2,0,1.097276782989502
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,3072,4,0,0.5865903854370117
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,3072,8,0,0.29896159172058107
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,3072,16,0,0.17110559940338135
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,3072,32,0,0.10367840528488159
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,3072,64,0,0.08512319922447205
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,3072,1,0,2.1274335861206053
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,3072,2,0,1.109126377105713
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,3072,4,0,0.5667295932769776
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,3072,8,0,0.29466240406036376
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,3072,16,0,0.17452479600906373
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,3072,32,0,0.10981919765472412
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,3072,64,0,0.09428640007972718
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,4096,1,0,3.8190513610839845
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,4096,2,0,1.9499984741210938
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,4096,4,0,1.0021504402160644
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,4096,8,0,0.49793438911437987
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,4096,16,0,0.2530607938766479
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,4096,32,0,0.13650399446487427
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,4096,64,0,0.12435359954833984
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,4096,1,0,3.7122352600097654
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,4096,2,0,1.8958415985107422
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,4096,4,0,0.9680831909179688
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,4096,8,0,0.4818143844604492
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,4096,16,0,0.26053600311279296
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,4096,32,0,0.14768799543380737
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,4096,64,0,0.13742560148239136
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,6144,1,0,8.581441497802734
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,6144,2,0,4.316147232055664
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,6144,4,0,2.1313264846801756
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,6144,8,0,1.0833295822143554
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,6144,16,0,0.550867223739624
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,6144,32,0,0.3034192085266113
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,6144,64,0,0.19281920194625854
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,6144,1,0,8.352816009521485
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,6144,2,0,4.12061767578125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,6144,4,0,2.1086944580078124
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,6144,8,0,1.085756778717041
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,6144,16,0,0.5703775882720947
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,6144,32,0,0.3161967992782593
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,6144,64,0,0.20934720039367677
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,8192,1,0,15.126815795898438
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,8192,2,0,7.552565002441407
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,8192,4,0,3.8182655334472657
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,8192,8,0,1.716080093383789
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,8192,16,0,0.9444016456604004
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,8192,32,0,0.47485599517822263
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,8192,64,0,0.2506063938140869
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,8192,1,0,14.275706481933593
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,8192,2,0,7.188655853271484
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,8192,4,0,3.5640495300292967
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,8192,8,0,1.7879152297973633
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,8192,16,0,0.9647536277770996
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,8192,32,0,0.510532808303833
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,8192,64,0,0.28245279788970945
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,10240,1,0,23.56944580078125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,10240,2,0,11.775273895263672
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,10240,4,0,6.0858001708984375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,10240,8,0,2.978476715087891
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,10240,16,0,1.46046724319458
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,10240,32,0,0.787007999420166
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,10240,64,0,0.42148637771606445
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,10240,1,0,22.29900665283203
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,10240,2,0,11.557234954833984
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,10240,4,0,5.655588912963867
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,10240,8,0,2.7894512176513673
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,10240,16,0,1.378003215789795
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,10240,32,0,0.7444479942321778
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,10240,64,0,0.4257503986358643
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,12288,1,0,34.182131958007815
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,12288,2,0,16.762112426757813
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,12288,4,0,8.513632202148438
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,12288,8,0,4.245080184936524
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,12288,16,0,2.0779232025146483
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,12288,32,0,1.0735088348388673
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,12288,64,0,0.6148687839508057
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,12288,1,0,32.23529968261719
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,12288,2,0,16.13612518310547
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,12288,4,0,7.994945526123047
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,12288,8,0,4.0227008819580075
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,12288,16,0,2.000515174865723
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,12288,32,0,1.0730048179626466
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,12288,64,0,0.6033984184265136
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,16384,1,0,60.011444091796875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,16384,2,0,29.933505249023437
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,16384,4,0,14.9315673828125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,16384,8,0,7.494404602050781
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,16384,16,0,3.739344024658203
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,16384,32,0,1.8890399932861328
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,16384,64,0,0.9436335563659668
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,16384,1,0,57.079437255859375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,16384,2,0,28.586453247070313
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,16384,4,0,14.274853515625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,16384,8,0,7.1174674987792965
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,16384,16,0,3.562148666381836
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,16384,32,0,1.7892656326293945
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,16384,64,0,0.9574048042297363
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,32768,1,0,238.0894287109375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,32768,2,0,118.25623779296875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,32768,4,0,59.263067626953124
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,32768,8,0,29.7375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,32768,16,0,14.970022583007813
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,32768,32,0,7.4851936340332035
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,32768,64,0,3.7074432373046875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,32768,1,0,226.2593505859375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,32768,2,0,112.4422607421875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,32768,4,0,56.427850341796876
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,32768,8,0,28.43629150390625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,32768,16,0,14.306747436523438
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,32768,32,0,7.1916755676269535
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,32768,64,0,3.6377422332763674
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,1,1,0,0.013911999762058258
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,1,2,0,0.013596799969673157
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,1,4,0,0.013038399815559387
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,1,8,0,0.012652799487113953
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,1,16,0,0.01250240057706833
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,1,32,0,0.012439999729394913
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,1,64,0,0.012507200241088867
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,1,1,0,0.019801600277423857
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,1,2,0,0.01786080002784729
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,1,4,0,0.017321600019931792
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,1,8,0,0.017316800355911256
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,1,16,0,0.01727519929409027
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,1,32,0,0.017033599317073822
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,1,64,0,0.016995200514793397
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,16,1,0,0.014120000600814819
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,16,2,0,0.014496000111103058
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,16,4,0,0.01398719996213913
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,16,8,0,0.013307200372219085
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,16,16,0,0.013184000551700593
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,16,32,0,0.012990400195121765
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,16,64,0,0.012331199645996094
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,16,1,0,0.02048799991607666
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,16,2,0,0.02040479928255081
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,16,4,0,0.01907840073108673
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,16,8,0,0.018943999707698823
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,16,16,0,0.01907680034637451
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,16,32,0,0.018783999979496
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,16,64,0,0.01828960031270981
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,32,1,0,0.015087999403476715
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,32,2,0,0.01462559998035431
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,32,4,0,0.014094400405883788
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,32,8,0,0.013504000008106231
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,32,16,0,0.013350400328636169
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,32,32,0,0.013307200372219085
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,32,64,0,0.013089600205421447
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,32,1,0,0.02168159931898117
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,32,2,0,0.021241599321365358
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,32,4,0,0.02040639966726303
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,32,8,0,0.01934719979763031
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,32,16,0,0.019041599333286287
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,32,32,0,0.01913280040025711
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,32,64,0,0.018883199989795686
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,64,1,0,0.02276639938354492
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,64,2,0,0.01525920033454895
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,64,4,0,0.014431999623775482
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,64,8,0,0.013998399674892425
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,64,16,0,0.013779200613498688
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,64,32,0,0.013406400382518769
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,64,64,0,0.01324319988489151
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,64,1,0,0.02925119996070862
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,64,2,0,0.021476800739765167
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,64,4,0,0.021073600649833678
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,64,8,0,0.020390400290489198
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,64,16,0,0.020329600572586058
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,64,32,0,0.019040000438690186
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,64,64,0,0.018887999653816222
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,128,1,0,0.04695520102977753
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,128,2,0,0.024439999461174013
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,128,4,0,0.01677920073270798
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,128,8,0,0.016494399309158324
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,128,16,0,0.016076800227165223
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,128,32,0,0.015254400670528412
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,128,64,0,0.015003199875354766
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,128,1,0,0.05573599934577942
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,128,2,0,0.030990400910377504
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,128,4,0,0.02348800003528595
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,128,8,0,0.022862400114536285
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,128,16,0,0.02213599979877472
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,128,32,0,0.021401600539684297
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,128,64,0,0.02112479954957962
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,256,1,0,0.09521440267562867
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,256,2,0,0.053179198503494264
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,256,4,0,0.028326401114463808
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,256,8,0,0.020582400262355804
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,256,16,0,0.020052799582481386
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,256,32,0,0.02003840059041977
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,256,64,0,0.019177600741386414
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,256,1,0,0.10466079711914063
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,256,2,0,0.06305919885635376
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,256,4,0,0.03548479974269867
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,256,8,0,0.02726239860057831
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,256,16,0,0.026851201057434083
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,256,32,0,0.026339200139045716
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,256,64,0,0.025519999861717223
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,512,1,0,0.22370560169219972
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,512,2,0,0.12000319957733155
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,512,4,0,0.06583679914474487
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,512,8,0,0.035902398824691775
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,512,16,0,0.030582401156425475
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,512,32,0,0.034673601388931274
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,512,64,0,0.02858720123767853
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,512,1,0,0.2342992067337036
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,512,2,0,0.13022559881210327
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,512,4,0,0.07537599802017211
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,512,8,0,0.044424000382423404
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,512,16,0,0.038192000985145566
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,512,32,0,0.043479999899864195
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,512,64,0,0.035867199301719666
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,1024,1,0,0.6302688121795654
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,1024,2,0,0.32508800029754636
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,1024,4,0,0.17023520469665526
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,1024,8,0,0.09180319905281067
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,1024,16,0,0.05103039741516113
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,1024,32,0,0.04582720100879669
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,1024,64,0,0.043726399540901184
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,1024,1,0,0.640664005279541
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,1024,2,0,0.3366384029388428
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,1024,4,0,0.18239359855651854
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,1024,8,0,0.10389120578765869
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,1024,16,0,0.06422240138053895
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,1024,32,0,0.05596640110015869
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,1024,64,0,0.05573599934577942
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,1536,1,0,1.2249919891357421
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,1536,2,0,0.6426688194274902
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,1536,4,0,0.33511838912963865
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,1536,8,0,0.1763551950454712
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,1536,16,0,0.1006432056427002
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,1536,32,0,0.06648480296134948
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,1536,64,0,0.0728879988193512
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,1536,1,0,1.2435615539550782
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,1536,2,0,0.6499680042266845
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,1536,4,0,0.3386575937271118
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,1536,8,0,0.18483519554138184
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,1536,16,0,0.11108959913253784
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,1536,32,0,0.07719839811325073
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,1536,64,0,0.08441600203514099
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,2048,1,0,2.0321184158325196
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,2048,2,0,1.1251423835754395
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,2048,4,0,0.5638927936553955
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,2048,8,0,0.27821600437164307
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,2048,16,0,0.15027999877929688
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,2048,32,0,0.08403840065002441
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,2048,64,0,0.0791104018688202
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,2048,1,0,2.0694511413574217
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,2048,2,0,1.0870176315307618
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,2048,4,0,0.5609776020050049
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,2048,8,0,0.29401121139526365
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,2048,16,0,0.1632848024368286
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,2048,32,0,0.09863359928131103
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,2048,64,0,0.09094719886779785
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,3072,1,0,4.4732929229736325
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,3072,2,0,2.2737327575683595
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,3072,4,0,1.16833438873291
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,3072,8,0,0.5770880222320557
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,3072,16,0,0.3034303903579712
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,3072,32,0,0.17386239767074585
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,3072,64,0,0.11565920114517211
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,3072,1,0,4.372552108764649
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,3072,2,0,2.229243278503418
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,3072,4,0,1.128331184387207
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,3072,8,0,0.586352014541626
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,3072,16,0,0.31415040493011476
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,3072,32,0,0.1919535994529724
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,3072,64,0,0.13331680297851561
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,4096,1,0,7.9408210754394535
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,4096,2,0,4.007417678833008
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,4096,4,0,2.0137327194213865
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,4096,8,0,1.0124896049499512
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,4096,16,0,0.5241712093353271
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,4096,32,0,0.2718384027481079
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,4096,64,0,0.1538864016532898
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,4096,1,0,7.621934509277343
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,4096,2,0,3.8159950256347654
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,4096,4,0,1.920542335510254
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,4096,8,0,0.9816767692565918
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,4096,16,0,0.5141823768615723
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,4096,32,0,0.2871232032775879
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,4096,64,0,0.17361439466476442
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,6144,1,0,17.640219116210936
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,6144,2,0,8.632275390625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,6144,4,0,4.364176177978516
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,6144,8,0,2.2172672271728517
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,6144,16,0,1.1811535835266114
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,6144,32,0,0.5825503826141357
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,6144,64,0,0.3277584075927734
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,6144,1,0,16.657858276367186
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,6144,2,0,8.312143707275391
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,6144,4,0,4.19072151184082
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,6144,8,0,2.136302375793457
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,6144,16,0,1.1084768295288085
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,6144,32,0,0.5939551830291748
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,6144,64,0,0.35629920959472655
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,8192,1,0,30.671786499023437
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,8192,2,0,15.326541137695312
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,8192,4,0,7.714727783203125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,8192,8,0,3.787260818481445
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,8192,16,0,1.8792335510253906
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,8192,32,0,0.9548815727233887
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,8192,64,0,0.5181583881378173
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,8192,1,0,29.3240966796875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,8192,2,0,14.783522033691407
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,8192,4,0,7.310201263427734
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,8192,8,0,3.638859176635742
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,8192,16,0,1.8566144943237304
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,8192,32,0,0.9576944351196289
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,8192,64,0,0.5307775974273682
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,10240,1,0,47.61624755859375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,10240,2,0,23.91008758544922
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,10240,4,0,11.903157043457032
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,10240,8,0,5.933190536499024
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,10240,16,0,2.9695856094360353
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,10240,32,0,1.488167953491211
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,10240,64,0,0.8198160171508789
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,10240,1,0,45.428594970703124
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,10240,2,0,22.688758850097656
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,10240,4,0,11.490516662597656
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,10240,8,0,5.759239959716797
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,10240,16,0,2.8445407867431642
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,10240,32,0,1.3788911819458007
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,10240,64,0,0.7903200149536133
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,12288,1,0,68.2189208984375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,12288,2,0,33.645285034179686
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,12288,4,0,17.313023376464844
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,12288,8,0,8.414545440673828
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,12288,16,0,4.160052871704101
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,12288,32,0,2.1018848419189453
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,12288,64,0,1.1067744255065919
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,12288,1,0,64.90791015625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,12288,2,0,32.54936828613281
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,12288,4,0,16.199517822265626
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,12288,8,0,8.21024169921875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,12288,16,0,4.134804916381836
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,12288,32,0,2.0968095779418947
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,12288,64,0,1.1213295936584473
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,16384,1,0,120.1866455078125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,16384,2,0,59.832916259765625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,16384,4,0,30.1453857421875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,16384,8,0,15.089295959472656
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,16384,16,0,7.622609710693359
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,16384,32,0,3.802620697021484
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,16384,64,0,1.8990543365478516
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,16384,1,0,114.31651611328125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,16384,2,0,57.07125244140625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,16384,4,0,28.673568725585938
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,16384,8,0,14.467678833007813
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,16384,16,0,7.21346435546875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,16384,32,0,3.610696029663086
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,16384,64,0,1.866119956970215
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,32768,1,0,475.956689453125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,32768,2,0,236.4455322265625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,32768,4,0,118.54783935546875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,32768,8,0,59.633349609375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,32768,16,0,29.8670166015625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,32768,32,0,15.08118133544922
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,32768,64,0,7.511054229736328
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,32768,1,0,452.440771484375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,32768,2,0,225.1943115234375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,32768,4,0,113.01837158203125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,32768,8,0,56.81878662109375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,32768,16,0,28.6293212890625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,32768,32,0,14.424977111816407
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,32768,64,0,7.347801971435547
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,1,1,0,0.014207999408245086
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,1,2,0,0.013803200423717498
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,1,4,0,0.013262400031089782
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,1,8,0,0.013148799538612366
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,1,16,0,0.01302880048751831
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,1,32,0,0.012950399518013
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,1,64,0,0.01313440054655075
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,1,1,0,0.02037599980831146
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,1,2,0,0.01992480009794235
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,1,4,0,0.019382399320602418
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,1,8,0,0.019254399836063384
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,1,16,0,0.0192208006978035
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,1,32,0,0.019171200692653656
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,1,64,0,0.019020800292491914
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,16,1,0,0.015220800042152404
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,16,2,0,0.014575999975204468
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,16,4,0,0.01449279934167862
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,16,8,0,0.014256000518798828
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,16,16,0,0.013598400354385375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,16,32,0,0.01326719969511032
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,16,64,0,0.012720000743865967
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,16,1,0,0.021755200624465943
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,16,2,0,0.021422399580478667
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,16,4,0,0.020403200387954713
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,16,8,0,0.019491200149059296
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,16,16,0,0.019313600659370423
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,16,32,0,0.019121600687503813
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,16,64,0,0.01860959976911545
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,32,1,0,0.02444320023059845
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,32,2,0,0.015388800203800202
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,32,4,0,0.01467359960079193
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,32,8,0,0.014739200472831726
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,32,16,0,0.014060799777507783
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,32,32,0,0.01356160044670105
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,32,64,0,0.013166399300098419
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,32,1,0,0.03174560070037842
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,32,2,0,0.02175839990377426
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,32,4,0,0.020985600352287293
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,32,8,0,0.02046239972114563
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,32,16,0,0.019492800533771514
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,32,32,0,0.0190528005361557
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,32,64,0,0.018961599469184874
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,64,1,0,0.04423680007457733
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,64,2,0,0.02306720018386841
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,64,4,0,0.015651200711727143
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,64,8,0,0.015190400183200836
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,64,16,0,0.014552000164985656
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,64,32,0,0.014604799449443817
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,64,64,0,0.013753600418567657
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,64,1,0,0.05337920188903809
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,64,2,0,0.030054399371147157
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,64,4,0,0.022166399657726286
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,64,8,0,0.021873599290847777
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,64,16,0,0.021108800172805788
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,64,32,0,0.020529599487781526
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,64,64,0,0.0195360004901886
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,128,1,0,0.083651202917099
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,128,2,0,0.04743039906024933
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,128,4,0,0.02667680084705353
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,128,8,0,0.017800000309944154
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,128,16,0,0.01725279986858368
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,128,32,0,0.017076799273490907
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,128,64,0,0.0162992000579834
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,128,1,0,0.09299359917640686
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,128,2,0,0.0574783980846405
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,128,4,0,0.03392159938812256
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,128,8,0,0.024323199689388276
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,128,16,0,0.023689599335193635
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,128,32,0,0.023265600204467773
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,128,64,0,0.022465600073337554
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,256,1,0,0.17532479763031006
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,256,2,0,0.09534720182418824
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,256,4,0,0.05463520288467407
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,256,8,0,0.03228000104427338
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,256,16,0,0.02202560007572174
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,256,32,0,0.02165440022945404
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,256,64,0,0.021704000234603883
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,256,1,0,0.18462079763412476
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,256,2,0,0.10468159914016724
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,256,4,0,0.06405760049819946
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,256,8,0,0.0404911994934082
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,256,16,0,0.029446399211883544
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,256,32,0,0.028784000873565675
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,256,64,0,0.02863680124282837
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,512,1,0,0.4273856163024902
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,512,2,0,0.22574241161346437
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,512,4,0,0.12100640535354615
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,512,8,0,0.06860960125923157
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,512,16,0,0.05082079768180847
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,512,32,0,0.03386400043964386
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,512,64,0,0.03763040006160736
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,512,1,0,0.43941922187805177
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,512,2,0,0.23729119300842286
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,512,4,0,0.13362079858779907
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,512,8,0,0.08064640164375306
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,512,16,0,0.06391199827194213
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,512,32,0,0.04408000111579895
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,512,64,0,0.04961119890213013
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,1024,1,0,1.2378543853759765
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,1024,2,0,0.6346144199371337
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,1024,4,0,0.3334111928939819
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,1024,8,0,0.1773408055305481
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,1024,16,0,0.09943360090255737
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,1024,32,0,0.07597919702529907
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,1024,64,0,0.052963197231292725
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,1024,1,0,1.2561840057373046
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,1024,2,0,0.6574543952941895
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,1024,4,0,0.3480799913406372
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,1024,8,0,0.19164320230484008
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,1024,16,0,0.11426080465316772
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,1024,32,0,0.0925104022026062
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,1024,64,0,0.066975998878479
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,1536,1,0,2.523151969909668
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,1536,2,0,1.3257439613342286
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,1536,4,0,0.6647264003753662
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,1536,8,0,0.3386624097824097
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,1536,16,0,0.18208800554275512
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,1536,32,0,0.11967680454254151
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,1536,64,0,0.08711040019989014
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,1536,1,0,2.4779935836791993
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,1536,2,0,1.2875568389892578
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,1536,4,0,0.6585552215576171
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,1536,8,0,0.350379204750061
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,1536,16,0,0.1998960018157959
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,1536,32,0,0.13846559524536134
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,1536,64,0,0.1072543978691101
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,2048,1,0,4.3004302978515625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,2048,2,0,2.1443695068359374
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,2048,4,0,1.0858991622924805
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,2048,8,0,0.5563231945037842
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,2048,16,0,0.29384961128234866
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,2048,32,0,0.16273119449615478
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,2048,64,0,0.12920479774475097
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,2048,1,0,4.135184097290039
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,2048,2,0,2.1121919631958006
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,2048,4,0,1.0714303970336914
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,2048,8,0,0.5603936195373536
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,2048,16,0,0.3108880043029785
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,2048,32,0,0.18381279706954956
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,2048,64,0,0.15027519464492797
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,3072,1,0,9.221521759033203
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,3072,2,0,4.778220748901367
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,3072,4,0,2.3441871643066405
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,3072,8,0,1.1715519905090332
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,3072,16,0,0.6231535911560059
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,3072,32,0,0.33128321170806885
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,3072,64,0,0.21743519306182862
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,3072,1,0,9.057711791992187
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,3072,2,0,4.532696151733399
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,3072,4,0,2.2260000228881838
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,3072,8,0,1.1528287887573243
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,3072,16,0,0.5992623805999756
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,3072,32,0,0.34389920234680177
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,3072,64,0,0.23891360759735109
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,4096,1,0,16.384901428222655
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,4096,2,0,8.144203186035156
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,4096,4,0,3.836671829223633
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,4096,8,0,2.001896095275879
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,4096,16,0,1.0248096466064454
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,4096,32,0,0.5449151992797852
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,4096,64,0,0.29794559478759763
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,4096,1,0,15.208465576171875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,4096,2,0,7.603860473632812
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,4096,4,0,4.09484634399414
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,4096,8,0,1.9567455291748046
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,4096,16,0,1.027616024017334
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,4096,32,0,0.5500576019287109
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,4096,64,0,0.32683520317077636
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,6144,1,0,35.31105346679688
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,6144,2,0,17.53677978515625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,6144,4,0,8.761070251464844
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,6144,8,0,4.377926254272461
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,6144,16,0,2.23604793548584
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,6144,32,0,1.175331211090088
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,6144,64,0,0.6336463928222656
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,6144,1,0,33.614315795898435
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,6144,2,0,16.815089416503906
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,6144,4,0,8.36028823852539
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,6144,8,0,4.208160018920898
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,6144,16,0,2.1377695083618162
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,6144,32,0,1.1442432403564453
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,6144,64,0,0.6539999961853027
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,8192,1,0,61.560028076171875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,8192,2,0,30.94808349609375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,8192,4,0,15.511398315429688
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,8192,8,0,7.734811401367187
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,8192,16,0,3.804751968383789
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,8192,32,0,1.9646783828735352
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,8192,64,0,1.0464559555053712
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,8192,1,0,58.809442138671876
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,8192,2,0,29.450253295898438
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,8192,4,0,14.914956665039062
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,8192,8,0,7.501152038574219
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,8192,16,0,3.7067886352539063
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,8192,32,0,1.917188835144043
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,8192,64,0,1.044092845916748
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,10240,1,0,95.16394653320313
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,10240,2,0,47.7043212890625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,10240,4,0,23.93800506591797
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,10240,8,0,12.099030303955079
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,10240,16,0,5.924142456054687
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,10240,32,0,2.959492874145508
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,10240,64,0,1.5461024284362792
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,10240,1,0,90.7117431640625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,10240,2,0,45.42985229492187
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,10240,4,0,22.852969360351562
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,10240,8,0,11.472679901123048
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,10240,16,0,5.829641723632813
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,10240,32,0,2.9063711166381836
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,10240,64,0,1.5812368392944336
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,12288,1,0,136.2811279296875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,12288,2,0,68.18741455078126
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,12288,4,0,34.16879577636719
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,12288,8,0,17.18236083984375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,12288,16,0,8.537928009033203
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,12288,32,0,4.329033660888672
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,12288,64,0,2.1960208892822264
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,12288,1,0,129.84974365234376
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,12288,2,0,64.92481689453125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,12288,4,0,32.585873413085935
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,12288,8,0,16.559925842285157
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,12288,16,0,8.179752349853516
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,12288,32,0,4.209854507446289
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,12288,64,0,2.218876838684082
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,16384,1,0,240.4458251953125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,16384,2,0,119.8558349609375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,16384,4,0,60.227239990234374
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,16384,8,0,30.28545227050781
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,16384,16,0,15.20325164794922
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,16384,32,0,7.621878051757813
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,16384,64,0,3.8134334564208983
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,16384,1,0,229.0098388671875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,16384,2,0,114.24530029296875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,16384,4,0,57.48598022460938
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,16384,8,0,28.830810546875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,16384,16,0,14.751016235351562
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,16384,32,0,7.337911987304688
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,16384,64,0,3.7812862396240234
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,1,1,0,0.014295999705791474
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,1,2,0,0.013915200531482697
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,1,4,0,0.013195200264453888
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,1,8,0,0.012600000202655792
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,1,16,0,0.012227199971675873
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,1,32,0,0.01239520013332367
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,1,64,0,0.012094400078058242
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,1,1,0,0.01931840032339096
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,1,2,0,0.018878400325775146
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,1,4,0,0.018412800133228303
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,1,8,0,0.01791999936103821
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,1,16,0,0.017900800704956053
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,1,32,0,0.01799200028181076
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,1,64,0,0.017895999550819396
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,16,1,0,0.024486400187015533
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,16,2,0,0.015435199439525604
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,16,4,0,0.014414399862289429
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,16,8,0,0.014550399780273438
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,16,16,0,0.013734400272369385
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,16,32,0,0.013385599851608277
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,16,64,0,0.012771199643611907
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,16,1,0,0.0321040004491806
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,16,2,0,0.021700799465179443
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,16,4,0,0.020894399285316466
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,16,8,0,0.020494399964809416
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,16,16,0,0.020627200603485107
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,16,32,0,0.019223999977111817
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,16,64,0,0.018457600474357606
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,32,1,0,0.044391998648643495
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,32,2,0,0.02327679991722107
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,32,4,0,0.01576640009880066
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,32,8,0,0.014828799664974213
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,32,16,0,0.01480800062417984
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,32,32,0,0.014233599603176116
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,32,64,0,0.013920000195503235
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,32,1,0,0.05365440249443054
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,32,2,0,0.029790401458740234
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,32,4,0,0.022243200242519377
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,32,8,0,0.02091040015220642
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,32,16,0,0.020824000239372253
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,32,32,0,0.020737600326538087
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,32,64,0,0.019662399590015412
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,64,1,0,0.07812319993972779
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,64,2,0,0.044863998889923096
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,64,4,0,0.024009600281715393
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,64,8,0,0.016335999965667723
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,64,16,0,0.015467199683189391
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,64,32,0,0.015588800609111785
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,64,64,0,0.014876799285411834
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,64,1,0,0.0883679986000061
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,64,2,0,0.054580801725387575
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,64,4,0,0.0316783994436264
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,64,8,0,0.023233599960803986
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,64,16,0,0.022193600237369538
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,64,32,0,0.02178879976272583
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,64,64,0,0.021031999588012697
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,128,1,0,0.15116000175476074
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,128,2,0,0.08351359963417053
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,128,4,0,0.0488783985376358
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,128,8,0,0.028152000904083253
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,128,16,0,0.018940800428390504
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,128,32,0,0.0185248002409935
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,128,64,0,0.01884640008211136
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,128,1,0,0.16076159477233887
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,128,2,0,0.09314240217208862
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,128,4,0,0.05883520245552063
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,128,8,0,0.03641439974308014
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,128,16,0,0.026313599944114686
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,128,32,0,0.025547200441360475
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,128,64,0,0.02568959891796112
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,256,1,0,0.3297375917434692
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,256,2,0,0.1764464020729065
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,256,4,0,0.0976639986038208
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,256,8,0,0.05748159885406494
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,256,16,0,0.03683519959449768
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,256,32,0,0.025255998969078063
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,256,64,0,0.025054401159286498
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,256,1,0,0.34184000492095945
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,256,2,0,0.18792799711227418
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,256,4,0,0.10937919616699218
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,256,8,0,0.06982719898223877
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,256,16,0,0.04985280036926269
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,256,32,0,0.034939199686050415
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,256,64,0,0.0344976007938385
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,512,1,0,0.8333071708679199
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,512,2,0,0.4362512111663818
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,512,4,0,0.2311631917953491
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,512,8,0,0.1260815978050232
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,512,16,0,0.08934400081634522
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,512,32,0,0.06043999791145325
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,512,64,0,0.040468800067901614
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,512,1,0,0.8483119964599609
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,512,2,0,0.4521471977233887
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,512,4,0,0.24676640033721925
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,512,8,0,0.14283839464187623
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,512,16,0,0.10587999820709229
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,512,32,0,0.07718080282211304
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,512,64,0,0.05455039739608765
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,1024,1,0,2.511916732788086
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,1024,2,0,1.308779239654541
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,1024,4,0,0.6692304134368896
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,1024,8,0,0.3453887939453125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,1024,16,0,0.18874560594558715
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,1024,32,0,0.13351360559463502
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,1024,64,0,0.09455040097236633
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,1024,1,0,2.5026271820068358
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,1024,2,0,1.3138992309570312
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,1024,4,0,0.6745215892791748
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,1024,8,0,0.36262400150299073
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,1024,16,0,0.21216158866882323
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,1024,32,0,0.15683200359344482
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,1024,64,0,0.11801600456237793
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,1536,1,0,5.198137664794922
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,1536,2,0,2.5980432510375975
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,1536,4,0,1.3171263694763184
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,1536,8,0,0.6634704113006592
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,1536,16,0,0.3499631881713867
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,1536,32,0,0.22443680763244628
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,1536,64,0,0.1516975998878479
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,1536,1,0,4.924142456054687
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,1536,2,0,2.562238311767578
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,1536,4,0,1.3225199699401855
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,1536,8,0,0.6896607875823975
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,1536,16,0,0.3803600072860718
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,1536,32,0,0.2546191930770874
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,1536,64,0,0.18238719701766967
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,2048,1,0,8.772134399414062
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,2048,2,0,4.396057510375977
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,2048,4,0,2.1681808471679687
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,2048,8,0,1.1179200172424317
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,2048,16,0,0.594652795791626
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,2048,32,0,0.32213759422302246
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,2048,64,0,0.22359840869903563
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,2048,1,0,8.646940612792969
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,2048,2,0,4.3042144775390625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,2048,4,0,2.223031997680664
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,2048,8,0,1.1412624359130858
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,2048,16,0,0.6086927890777588
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,2048,32,0,0.3535248041152954
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,2048,64,0,0.26093919277191163
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,3072,1,0,18.677249145507812
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,3072,2,0,9.456046295166015
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,3072,4,0,4.860647964477539
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,3072,8,0,2.461235237121582
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,3072,16,0,1.2005279541015625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,3072,32,0,0.6360735893249512
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,3072,64,0,0.39855520725250243
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,3072,1,0,17.729873657226562
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,3072,2,0,8.98834228515625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,3072,4,0,4.6565296173095705
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,3072,8,0,2.3411792755126952
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,3072,16,0,1.2268431663513184
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,3072,32,0,0.6896096229553222
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,3072,64,0,0.4535520076751709
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,4096,1,0,32.225335693359376
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,4096,2,0,16.265936279296874
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,4096,4,0,8.050055694580077
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,4096,8,0,4.015801620483399
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,4096,16,0,2.0353328704833986
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,4096,32,0,1.1262816429138183
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,4096,64,0,0.5768352031707764
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,4096,1,0,30.892578125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,4096,2,0,15.720777893066407
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,4096,4,0,7.825796508789063
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,4096,8,0,3.9345550537109375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,4096,16,0,2.0306400299072265
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,4096,32,0,1.0867520332336427
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,4096,64,0,0.6355504035949707
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,6144,1,0,70.48119506835937
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,6144,2,0,35.51042785644531
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,6144,4,0,17.664517211914063
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,6144,8,0,8.9216796875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,6144,16,0,4.484588623046875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,6144,32,0,2.2873615264892577
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,6144,64,0,1.2446800231933595
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,6144,1,0,67.49395751953125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,6144,2,0,33.71318054199219
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,6144,4,0,16.9691650390625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,6144,8,0,8.527838134765625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,6144,16,0,4.342697525024414
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,6144,32,0,2.2831663131713866
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,6144,64,0,1.2820079803466797
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,8192,1,0,123.32266845703126
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,8192,2,0,61.66934814453125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,8192,4,0,31.052047729492188
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,8192,8,0,15.670919799804688
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,8192,16,0,7.850323486328125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,8192,32,0,3.9172863006591796
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,8192,64,0,2.040943908691406
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,8192,1,0,117.62801513671874
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,8192,2,0,59.00902099609375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,8192,4,0,29.60881652832031
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,8192,8,0,15.073471069335938
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,8192,16,0,7.5563201904296875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,8192,32,0,3.8838527679443358
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,8192,64,0,2.09451847076416
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,1,1,0,0.014310400187969207
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,1,2,0,0.013105599582195282
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,1,4,0,0.012707200646400452
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,1,8,0,0.012409599870443344
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,1,16,0,0.012257599830627441
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,1,32,0,0.012057600170373916
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,1,64,0,0.012270399928092956
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,1,1,0,0.01966080069541931
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,1,2,0,0.018449600040912627
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,1,4,0,0.01802079975605011
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,1,8,0,0.01791200041770935
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,1,16,0,0.017923200130462648
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,1,32,0,0.017811200022697447
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,1,64,0,0.017828799784183502
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,16,1,0,0.04483039975166321
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,16,2,0,0.023240000009536743
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,16,4,0,0.015905599296092986
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,16,8,0,0.014907200634479523
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,16,16,0,0.015184000134468079
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,16,32,0,0.01406240016222
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,16,64,0,0.013199999928474426
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,16,1,0,0.054179197549819945
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,16,2,0,0.02993600070476532
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,16,4,0,0.02221439927816391
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,16,8,0,0.02118239998817444
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,16,16,0,0.021376000344753267
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,16,32,0,0.020175999402999877
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,16,64,0,0.019739200174808503
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,32,1,0,0.07796639800071717
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,32,2,0,0.04488480091094971
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,32,4,0,0.023923200368881226
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,32,8,0,0.016318400204181672
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,32,16,0,0.015507200360298156
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,32,32,0,0.01562879979610443
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,32,64,0,0.01465120017528534
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,32,1,0,0.08756960034370423
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,32,2,0,0.05460159778594971
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,32,4,0,0.030979201197624207
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,32,8,0,0.023134399950504304
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,32,16,0,0.022550399601459502
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,32,32,0,0.02181600034236908
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,32,64,0,0.020790399610996248
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,64,1,0,0.14112800359725952
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,64,2,0,0.07968639731407165
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,64,4,0,0.04635519981384277
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,64,8,0,0.025603199005126955
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,64,16,0,0.017697599530220032
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,64,32,0,0.017078399658203125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,64,64,0,0.01720159947872162
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,64,1,0,0.15115360021591187
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,64,2,0,0.0902351975440979
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,64,4,0,0.056643199920654294
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,64,8,0,0.03372159898281098
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,64,16,0,0.025284799933433532
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,64,32,0,0.024422399699687958
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,64,64,0,0.024172799289226533
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,128,1,0,0.28401119709014894
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,128,2,0,0.15230400562286378
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,128,4,0,0.0852735996246338
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,128,8,0,0.051678401231765744
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,128,16,0,0.031201601028442383
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,128,32,0,0.022283199429512023
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,128,64,0,0.02205760031938553
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,128,1,0,0.2962127923965454
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,128,2,0,0.164409601688385
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,128,4,0,0.09757760167121887
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,128,8,0,0.06423360109329224
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,128,16,0,0.04458079934120178
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,128,32,0,0.03207840025424957
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,128,64,0,0.031414398550987245
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,256,1,0,0.639412784576416
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,256,2,0,0.3386143922805786
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,256,4,0,0.18279680013656616
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,256,8,0,0.10130239725112915
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,256,16,0,0.06468639969825744
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,256,32,0,0.044182398915290834
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,256,64,0,0.031697601079940796
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,256,1,0,0.6543615818023681
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,256,2,0,0.35438880920410154
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,256,4,0,0.19883999824523926
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,256,8,0,0.11755679845809937
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,256,16,0,0.0810800015926361
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,256,32,0,0.06079519987106323
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,256,64,0,0.04529440104961395
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,512,1,0,1.6417999267578125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,512,2,0,0.8587056159973144
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,512,4,0,0.459657621383667
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,512,8,0,0.24335200786590577
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,512,16,0,0.16753120422363282
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,512,32,0,0.10492960214614869
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,512,64,0,0.07492799758911133
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,512,1,0,1.6730192184448243
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,512,2,0,0.8865936279296875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,512,4,0,0.47669281959533694
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,512,8,0,0.2646656036376953
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,512,16,0,0.18868639469146728
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,512,32,0,0.12908480167388917
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,512,64,0,0.09881759881973266
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,1024,1,0,5.217752075195312
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,1024,2,0,2.602275276184082
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,1024,4,0,1.339254379272461
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,1024,8,0,0.6943007946014405
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,1024,16,0,0.3722640037536621
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,1024,32,0,0.24808320999145508
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,1024,64,0,0.1638319969177246
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,1024,1,0,4.989051055908203
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,1024,2,0,2.5555871963500976
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,1024,4,0,1.3486783981323243
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,1024,8,0,0.7069568157196044
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,1024,16,0,0.40430560111999514
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,1024,32,0,0.28664000034332277
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,1024,64,0,0.20334880352020263
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,1536,1,0,10.510982513427734
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,1536,2,0,5.259400177001953
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,1536,4,0,2.6362207412719725
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,1536,8,0,1.3677663803100586
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,1536,16,0,0.7209519863128662
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,1536,32,0,0.43671841621398927
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,1536,64,0,0.27738080024719236
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,1536,1,0,10.270238494873047
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,1536,2,0,5.152588653564453
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,1536,4,0,2.5873247146606446
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,1536,8,0,1.3314144134521484
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,1536,16,0,0.7389520168304443
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,1536,32,0,0.4819471836090088
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,1536,64,0,0.32969601154327394
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,2048,1,0,17.601113891601564
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,2048,2,0,8.880818939208984
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,2048,4,0,4.741985702514649
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,2048,8,0,2.3528720855712892
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,2048,16,0,1.2105376243591308
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,2048,32,0,0.6324639797210694
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,2048,64,0,0.41363039016723635
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,2048,1,0,16.872843933105468
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,2048,2,0,8.549673461914063
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,2048,4,0,4.383947372436523
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,2048,8,0,2.320096015930176
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,2048,16,0,1.2452799797058105
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,2048,32,0,0.7020544052124024
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,2048,64,0,0.4850912094116211
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,3072,1,0,37.921295166015625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,3072,2,0,18.869952392578124
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,3072,4,0,9.476367950439453
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,3072,8,0,4.853414535522461
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,3072,16,0,2.4359792709350585
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,3072,32,0,1.2891072273254394
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,3072,64,0,0.7770319938659668
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,3072,1,0,36.30873107910156
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,3072,2,0,18.022621154785156
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,3072,4,0,9.10201416015625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,3072,8,0,4.773793411254883
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,3072,16,0,2.469822311401367
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,3072,32,0,1.3715999603271485
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,3072,64,0,0.8510944366455078
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,4096,1,0,64.74075927734376
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,4096,2,0,32.50130004882813
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,4096,4,0,16.51643524169922
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,4096,8,0,8.247294616699218
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,4096,16,0,4.140596771240235
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,4096,32,0,2.1544368743896483
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,4096,64,0,1.1615535736083984
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,4096,1,0,62.18814697265625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,4096,2,0,31.257135009765626
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,4096,4,0,15.813827514648438
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,4096,8,0,7.942348480224609
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,4096,16,0,4.149051284790039
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,4096,32,0,2.1922256469726564
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,4096,64,0,1.2714896202087402
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,32,1,1,0,0.014531199634075165
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,1,2,0,0.014017599821090698
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,1,4,0,0.013240000605583191
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,1,8,0,0.012907199561595917
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,1,16,0,0.01273760050535202
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,1,32,0,0.012796799838542938
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,1,64,0,0.012558400630950928
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,32,1,1,0,0.01995840072631836
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,1,2,0,0.01931679993867874
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,1,4,0,0.01920959949493408
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,1,8,0,0.018500800430774688
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,1,16,0,0.018160000443458557
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,1,32,0,0.018036800622940063
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,1,64,0,0.01822720021009445
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,32,16,1,0,0.07915520071983337
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,16,2,0,0.04559360146522522
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,16,4,0,0.02462079972028732
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,16,8,0,0.01703200042247772
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,16,16,0,0.016051200032234193
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,16,32,0,0.01560479998588562
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,16,64,0,0.014822399616241455
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,32,16,1,0,0.08878080248832702
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,16,2,0,0.05533120036125183
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,16,4,0,0.032148799300193785
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,16,8,0,0.02344000041484833
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,16,16,0,0.023174400627613067
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,16,32,0,0.022302399575710296
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,16,64,0,0.021217599511146545
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,32,32,1,0,0.1415984034538269
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,32,2,0,0.07995520234107971
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,32,4,0,0.046798399090766905
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,32,8,0,0.02612000107765198
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,32,16,0,0.018464000523090364
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,32,32,0,0.0177839994430542
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,32,64,0,0.017470400035381316
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,32,32,1,0,0.15102880001068114
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,32,2,0,0.09027680158615112
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,32,4,0,0.056740802526473996
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,32,8,0,0.034948799014091494
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,32,16,0,0.025696000456809996
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,32,32,0,0.024855999648571013
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,32,64,0,0.024742400646209715
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,32,64,1,0,0.269321608543396
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,64,2,0,0.1449455976486206
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,64,4,0,0.08313599824905396
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,64,8,0,0.04975839853286743
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,64,16,0,0.029491201043128967
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,64,32,0,0.02144159972667694
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,64,64,0,0.02099840044975281
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,32,64,1,0,0.27797598838806153
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,64,2,0,0.1566848039627075
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,64,4,0,0.09562079906463623
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,64,8,0,0.062459200620651245
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,64,16,0,0.042345601320266726
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,64,32,0,0.03116160035133362
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,64,64,0,0.030478399991989136
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,32,128,1,0,0.5535488128662109
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,128,2,0,0.2915152072906494
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,128,4,0,0.15632319450378418
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,128,8,0,0.09208160042762756
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,128,16,0,0.058462399244308474
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,128,32,0,0.03924799859523773
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,128,64,0,0.029043200612068176
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,32,128,1,0,0.5679647922515869
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,128,2,0,0.30702240467071534
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,128,4,0,0.1711151957511902
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,128,8,0,0.1079632043838501
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,128,16,0,0.07448959946632386
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,128,32,0,0.05598239898681641
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,128,64,0,0.04287999868392944
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,32,256,1,0,1.2595888137817384
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,256,2,0,0.6665728092193604
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,256,4,0,0.3556960105895996
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,256,8,0,0.19088000059127808
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,256,16,0,0.11577279567718506
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,256,32,0,0.07866719961166382
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,256,64,0,0.0586031973361969
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,32,256,1,0,1.279806423187256
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,256,2,0,0.6873680114746094
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,256,4,0,0.37835040092468264
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,256,8,0,0.2139967918395996
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,256,16,0,0.13876160383224487
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,256,32,0,0.10171840190887452
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,256,64,0,0.08288000226020813
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,32,512,1,0,3.3505279541015627
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,512,2,0,1.73560791015625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,512,4,0,0.907688045501709
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,512,8,0,0.47359838485717776
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,512,16,0,0.31813759803771974
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,512,32,0,0.19613759517669677
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,512,64,0,0.1329632043838501
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,32,512,1,0,3.321451187133789
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,512,2,0,1.7565216064453124
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,512,4,0,0.9424544334411621
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,512,8,0,0.5138288021087647
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,512,16,0,0.3586719989776611
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,512,32,0,0.2364896059036255
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,512,64,0,0.17395360469818116
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,32,1024,1,0,10.408814239501954
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,1024,2,0,5.2419391632080075
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,1024,4,0,2.646668815612793
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,1024,8,0,1.3484864234924316
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,1024,16,0,0.7256832122802734
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,1024,32,0,0.4701551914215088
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,1024,64,0,0.3028287887573242
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,32,1024,1,0,10.343160247802734
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,1024,2,0,5.210575866699219
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,1024,4,0,2.661204719543457
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,1024,8,0,1.3990639686584472
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,1024,16,0,0.7863664150238037
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,1024,32,0,0.5430736064910888
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,1024,64,0,0.37771039009094237
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,32,1536,1,0,21.16033477783203
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,1536,2,0,10.886911773681641
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,1536,4,0,5.541479873657226
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,1536,8,0,2.6973039627075197
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,1536,16,0,1.4459936141967773
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,1536,32,0,0.8512895584106446
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,1536,64,0,0.5227503776550293
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,32,1536,1,0,20.487210083007813
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,1536,2,0,10.527210998535157
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,1536,4,0,5.4267230987548825
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,1536,8,0,2.7003360748291017
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,1536,16,0,1.4736543655395509
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,1536,32,0,0.9418288230895996
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,1536,64,0,0.62467360496521
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,32,2048,1,0,35.989801025390626
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,2048,2,0,17.808140563964844
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,2048,4,0,9.0334716796875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,2048,8,0,4.644321441650391
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,2048,16,0,2.3636192321777343
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,2048,32,0,1.2551199913024902
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,2048,64,0,0.7972400188446045
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,32,2048,1,0,34.39823913574219
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,2048,2,0,17.26187744140625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,2048,4,0,8.707532501220703
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,2048,8,0,4.432430267333984
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,2048,16,0,2.380958366394043
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,2048,32,0,1.348910427093506
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,2048,64,0,0.9174063682556153
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,64,1,1,0,0.015967999398708344
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,1,2,0,0.014715200662612915
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,1,4,0,0.013648000359535218
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,1,8,0,0.01332319974899292
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,1,16,0,0.013468800485134125
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,1,32,0,0.01318880021572113
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,1,64,0,0.013462400436401368
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,64,1,1,0,0.02197439968585968
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,1,2,0,0.020734399557113647
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,1,4,0,0.019364799559116363
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,1,8,0,0.019377599656581878
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,1,16,0,0.01916320025920868
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,1,32,0,0.019148799777030944
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,1,64,0,0.019270400702953338
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,64,16,1,0,0.1431040048599243
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,16,2,0,0.08217920064926147
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,16,4,0,0.04818080067634582
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,16,8,0,0.026704001426696777
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,16,16,0,0.01897760033607483
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,16,32,0,0.017827199399471284
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,16,64,0,0.01714400053024292
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,64,16,1,0,0.15199999809265136
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,16,2,0,0.09131519794464112
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,16,4,0,0.057652801275253296
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,16,8,0,0.03489440083503723
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,16,16,0,0.026412799954414368
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,16,32,0,0.02539680004119873
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,16,64,0,0.024139200150966645
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,64,32,1,0,0.2670144081115723
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,32,2,0,0.14612640142440797
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,32,4,0,0.08434399962425232
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,32,8,0,0.051211202144622804
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,32,16,0,0.03051519989967346
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,32,32,0,0.022342400252819063
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,32,64,0,0.021113599836826324
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,64,32,1,0,0.2776416063308716
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,32,2,0,0.15770560503005981
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,32,4,0,0.0964464008808136
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,32,8,0,0.0631168007850647
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,32,16,0,0.04331679940223694
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,32,32,0,0.032020801305770875
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,32,64,0,0.03094080090522766
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,64,64,1,0,0.5240543842315674
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,64,2,0,0.27625598907470705
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,64,4,0,0.15264639854431153
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,64,8,0,0.09024320244789123
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,64,16,0,0.05696799755096436
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,64,32,0,0.03800320029258728
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,64,64,0,0.02844800055027008
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,64,64,1,0,0.5374383926391602
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,64,2,0,0.2883104085922241
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,64,4,0,0.16664479970932006
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,64,8,0,0.10609279870986939
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,64,16,0,0.07303360104560852
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,64,32,0,0.05418879985809326
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,64,64,0,0.042100799083709714
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,64,128,1,0,1.0834959983825683
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,128,2,0,0.5693759918212891
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,128,4,0,0.29781599044799806
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,128,8,0,0.16957600116729737
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,128,16,0,0.10575200319290161
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,128,32,0,0.07256640195846557
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,128,64,0,0.05462719798088074
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,64,128,1,0,1.1102239608764648
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,128,2,0,0.5924880027770996
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,128,4,0,0.32079999446868895
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,128,8,0,0.19243680238723754
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,128,16,0,0.12880959510803222
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,128,32,0,0.09589440226554871
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,128,64,0,0.07805439829826355
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,64,256,1,0,2.498912048339844
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,256,2,0,1.3232144355773925
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,256,4,0,0.7014095783233643
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,256,8,0,0.36957440376281736
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,256,16,0,0.21945600509643554
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,256,32,0,0.1444991946220398
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,256,64,0,0.10596959590911866
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,64,256,1,0,2.533497619628906
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,256,2,0,1.3599920272827148
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,256,4,0,0.7415584087371826
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,256,8,0,0.40944638252258303
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,256,16,0,0.2591295957565308
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,256,32,0,0.18407200574874877
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,256,64,0,0.14595199823379518
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,64,512,1,0,6.772395324707031
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,512,2,0,3.4761600494384766
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,512,4,0,1.7952720642089843
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,512,8,0,0.9296352386474609
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,512,16,0,0.6185440063476563
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,512,32,0,0.3719280004501343
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,512,64,0,0.2492095947265625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,64,512,1,0,6.701121520996094
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,512,2,0,3.4949409484863283
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,512,4,0,1.8664079666137696
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,512,8,0,1.0091792106628419
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,512,16,0,0.6939551830291748
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,512,32,0,0.44849438667297364
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,512,64,0,0.3252671957015991
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,64,1024,1,0,21.015113830566406
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,1024,2,0,10.799246215820313
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,1024,4,0,5.564444732666016
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,1024,8,0,2.703638458251953
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,1024,16,0,1.432033634185791
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,1024,32,0,0.9190735816955566
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,1024,64,0,0.579366397857666
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,64,1024,1,0,20.326924133300782
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,1024,2,0,10.498750305175781
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,1024,4,0,5.518979263305664
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,1024,8,0,2.7748767852783205
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,1024,16,0,1.5553471565246582
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,1024,32,0,1.0552176475524901
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,1024,64,0,0.7199888229370117
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,128,1,1,0,0.026531198620796205
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,1,2,0,0.016519999504089354
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,1,4,0,0.015118399262428283
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,1,8,0,0.015272000432014465
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,1,16,0,0.01505119949579239
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,1,32,0,0.015140800178050995
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,1,64,0,0.01462559998035431
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,128,1,1,0,0.03213759958744049
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,1,2,0,0.022761599719524385
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,1,4,0,0.021377600729465485
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,1,8,0,0.02062239944934845
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,1,16,0,0.02091040015220642
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,1,32,0,0.020633600652217865
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,1,64,0,0.02065120041370392
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,128,16,1,0,0.26758561134338377
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,16,2,0,0.14652639627456665
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,16,4,0,0.08481600284576415
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,16,8,0,0.05140799880027771
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,16,16,0,0.031171199679374696
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,16,32,0,0.022918400168418885
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,16,64,0,0.021400000154972076
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,128,16,1,0,0.2800271987915039
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,16,2,0,0.15862239599227906
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,16,4,0,0.0968671977519989
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,16,8,0,0.06400480270385742
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,16,16,0,0.04433920085430145
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,16,32,0,0.03319360017776489
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,16,64,0,0.031108799576759338
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,128,32,1,0,0.5220719814300537
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,32,2,0,0.2740272045135498
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,32,4,0,0.15488799810409545
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,32,8,0,0.09126560091972351
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,32,16,0,0.05768160223960876
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,32,32,0,0.038791999220848083
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,32,64,0,0.02945759892463684
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,128,32,1,0,0.5384384155273437
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,32,2,0,0.29066879749298097
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,32,4,0,0.16796959638595582
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,32,8,0,0.10687839984893799
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,32,16,0,0.0738048017024994
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,32,32,0,0.05543199777603149
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,32,64,0,0.043188801407814024
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,128,64,1,0,1.03603515625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,64,2,0,0.534660816192627
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,64,4,0,0.28678719997406005
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,64,8,0,0.1657088041305542
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,64,16,0,0.10478399991989136
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,64,32,0,0.07086079716682434
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,64,64,0,0.05277280211448669
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,128,64,1,0,1.0610896110534669
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,64,2,0,0.5596496105194092
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,64,4,0,0.3113600015640259
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,64,8,0,0.18936320543289184
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,64,16,0,0.12748320102691652
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,64,32,0,0.09424800276756287
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,64,64,0,0.07647680044174195
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,128,128,1,0,2.1534080505371094
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,128,2,0,1.127387237548828
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,128,4,0,0.5835087776184082
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,128,8,0,0.32684481143951416
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,128,16,0,0.19795520305633546
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,128,32,0,0.13566720485687256
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,128,64,0,0.10067199468612671
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,128,128,1,0,2.1928783416748048
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,128,2,0,1.1666080474853515
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,128,4,0,0.6220335960388184
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,128,8,0,0.3660320043563843
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,128,16,0,0.2380143880844116
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,128,32,0,0.17543519735336305
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,128,64,0,0.14128799438476564
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,128,256,1,0,4.981828689575195
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,256,2,0,2.641921615600586
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,256,4,0,1.398323154449463
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,256,8,0,0.7289631843566895
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,256,16,0,0.43099040985107423
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,256,32,0,0.27425599098205566
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,256,64,0,0.19814399480819703
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,128,256,1,0,5.053073501586914
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,256,2,0,2.698819160461426
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,256,4,0,1.4662063598632813
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,256,8,0,0.8026047706604004
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,256,16,0,0.49880638122558596
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,256,32,0,0.3490544080734253
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,256,64,0,0.27397119998931885
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,128,512,1,0,13.708830261230469
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,512,2,0,7.021616363525391
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,512,4,0,3.608612823486328
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,512,8,0,1.8515199661254882
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,512,16,0,1.2196847915649414
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,512,32,0,0.7226624011993408
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,512,64,0,0.47739200592041015
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,128,512,1,0,13.489599609375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,512,2,0,6.963545227050782
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,512,4,0,3.713584136962891
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,512,8,0,1.988145637512207
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,512,16,0,1.3652864456176759
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,512,32,0,0.8688608169555664
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,512,64,0,0.6232736110687256
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,256,1,1,0,0.048316800594329835
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,1,2,0,0.024583999812602998
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,1,4,0,0.023294399678707122
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,1,8,0,0.022099199891090392
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,1,16,0,0.0221328005194664
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,1,32,0,0.02221920043230057
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,1,64,0,0.021913599967956544
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,256,1,1,0,0.05477920174598694
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,1,2,0,0.030955201387405394
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,1,4,0,0.028567999601364136
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,1,8,0,0.028683200478553772
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,1,16,0,0.02803199887275696
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,1,32,0,0.027988800406455995
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,1,64,0,0.028046399354934692
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,256,16,1,0,0.5219920158386231
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,16,2,0,0.27543840408325193
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,16,4,0,0.15468959808349608
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,16,8,0,0.09207680225372314
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,16,16,0,0.058595198392868045
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,16,32,0,0.038780799508094786
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,16,64,0,0.035787200927734374
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,256,16,1,0,0.5349199771881104
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,16,2,0,0.2889872074127197
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,16,4,0,0.16898880004882813
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,16,8,0,0.10765600204467773
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,16,16,0,0.07465440034866333
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,16,32,0,0.05500800013542175
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,16,64,0,0.048825600743293764
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,256,32,1,0,1.0296496391296386
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,32,2,0,0.5339216232299805
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,32,4,0,0.2904576063156128
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,32,8,0,0.16772960424423217
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,32,16,0,0.10603679418563842
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,32,32,0,0.07173280119895935
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,32,64,0,0.0526639997959137
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,256,32,1,0,1.0550000190734863
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,32,2,0,0.5556464195251465
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,32,4,0,0.31033279895782473
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,32,8,0,0.19038879871368408
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,32,16,0,0.12787519693374633
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,32,32,0,0.0948527991771698
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,32,64,0,0.07715839743614197
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,256,64,1,0,2.0459999084472655
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,64,2,0,1.061638355255127
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,64,4,0,0.5668479919433593
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,64,8,0,0.32013440132141113
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,64,16,0,0.19643039703369142
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,64,32,0,0.1347584009170532
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,64,64,0,0.09952960014343262
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,256,64,1,0,2.0986223220825195
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,64,2,0,1.104478359222412
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,64,4,0,0.6131199836730957
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,64,8,0,0.35904479026794434
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,64,16,0,0.23523519039154053
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,64,32,0,0.17386560440063475
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,64,64,0,0.1394703984260559
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,256,128,1,0,4.299556732177734
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,128,2,0,2.237851142883301
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,128,4,0,1.1516160011291503
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,128,8,0,0.6372064113616943
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,128,16,0,0.38817439079284666
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,128,32,0,0.251910400390625
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,128,64,0,0.18820159435272216
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,256,128,1,0,4.345756912231446
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,128,2,0,2.3159296035766603
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,128,4,0,1.2240351676940917
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,128,8,0,0.7159071922302246
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,128,16,0,0.4552944183349609
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,128,32,0,0.3276576042175293
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,128,64,0,0.26544320583343506
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,128,256,256,1,0,10.046027374267577
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,256,2,0,5.264225769042969
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,256,4,0,2.780564880371094
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,256,8,0,1.439236831665039
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,256,16,0,0.8412768363952636
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,256,32,0,0.5263679981231689
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,256,64,0,0.37702240943908694
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,256,256,1,0,10.066697692871093
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,256,2,0,5.377734375
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,256,4,0,2.9143232345581054
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,256,8,0,1.5758864402770996
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,256,16,0,0.9865167617797852
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,256,32,0,0.6726624011993408
SGLang,0.5.10,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,256,64,0,0.5228799819946289
