framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,1,1,0,0.012772800028324127
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,1,2,0,0.012190400063991547
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,1,4,0,0.012166400253772736
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,1,8,0,0.012147200107574464
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,1,16,0,0.012121599912643433
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,1,32,0,0.012142399698495865
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,1,64,0,0.012095999717712403
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,1,1,0,0.018353599309921264
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,1,4,0,0.017664000391960144
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,1,8,0,0.017526400089263917
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,1,16,0,0.01775359958410263
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,1,32,0,0.017815999686717987
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,1,64,0,0.017543999850749968
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,16,1,0,0.014731200039386749
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,16,2,0,0.013766400516033173
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,16,16,0,0.013412800431251527
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,16,1,0,0.019628800451755524
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,16,2,0,0.019363200664520262
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,16,4,0,0.019441600143909454
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,16,16,0,0.019124799966812135
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,16,64,0,0.018479999899864197
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,32,1,0,0.014779199659824372
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,32,2,0,0.013764800131320953
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,32,4,0,0.013630400598049163
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,32,8,0,0.013692800700664521
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,32,16,0,0.013689599931240082
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,32,32,0,0.013383999466896057
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,32,64,0,0.013076800107955932
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,32,1,0,0.020916800200939178
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,32,2,0,0.019969600439071655
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,32,4,0,0.01969120055437088
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,32,8,0,0.019543999433517457
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,32,16,0,0.019340799748897554
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,32,32,0,0.019108800590038298
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,32,64,0,0.01868640035390854
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,64,1,0,0.015534399449825287
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,64,2,0,0.014996799826622009
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,64,4,0,0.014731200039386749
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,64,8,0,0.013689599931240082
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,64,16,0,0.01377120018005371
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,64,32,0,0.013806399703025819
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,64,64,0,0.013542400300502777
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,64,1,0,0.021452799439430237
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,64,2,0,0.021059200167655945
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,64,4,0,0.019993600249290467
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,64,8,0,0.019755199551582336
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,64,16,0,0.019633600115776063
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,64,32,0,0.019409599900245666
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,64,64,0,0.019148799777030944
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,128,1,0,0.01722240000963211
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,128,2,0,0.01703200042247772
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,128,4,0,0.016606399416923524
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,128,8,0,0.01568160057067871
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,128,16,0,0.015619200468063355
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,128,32,0,0.01544319987297058
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,128,64,0,0.01446560025215149
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,128,1,0,0.02388159930706024
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,128,2,0,0.022966399788856506
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,128,4,0,0.022838400304317476
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,128,8,0,0.022308799624443054
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,128,16,0,0.02131199985742569
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,128,32,0,0.021091200411319733
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,128,64,0,0.020294399559497835
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,256,1,0,0.028505599498748778
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,256,2,0,0.020472000539302825
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,256,4,0,0.02035519927740097
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,256,8,0,0.019064000248908995
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,256,16,0,0.0191103994846344
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,256,32,0,0.018993599712848662
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,256,64,0,0.01796800047159195
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,256,1,0,0.035953599214553836
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,256,2,0,0.027275198698043825
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,256,4,0,0.026396799087524413
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,256,8,0,0.025353598594665527
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,256,16,0,0.025964799523353576
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,256,32,0,0.024889600276947022
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,256,64,0,0.02367040067911148
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,512,1,0,0.0671072006225586
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,512,2,0,0.03569760024547577
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,512,4,0,0.027046400308609008
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,512,8,0,0.035175999999046324
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,512,16,0,0.028086400032043456
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,512,32,0,0.023792000114917757
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,512,64,0,0.022500799596309663
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,512,1,0,0.07765439748764039
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,512,2,0,0.04493600130081177
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,512,4,0,0.033897599577903746
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,512,8,0,0.04272480010986328
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,512,16,0,0.03519200086593628
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,512,32,0,0.03080959916114807
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,512,64,0,0.02940959930419922
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,1024,1,0,0.17282880544662477
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,1024,2,0,0.09180160164833069
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,1024,4,0,0.04892640113830567
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,1024,8,0,0.04075360000133514
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,1024,16,0,0.04286240041255951
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,1024,32,0,0.031887999176979064
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,1024,64,0,0.02735520005226135
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,1024,1,0,0.1822559952735901
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,1024,2,0,0.10175199508666992
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,16,8,0,0.019414399564266206
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,16,4,0,0.01379680037498474
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,16,32,0,0.0127920001745224
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,1024,4,0,0.057924801111221315
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,1024,8,0,0.048135998845100406
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,1024,16,0,0.051072001457214355
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,1024,32,0,0.04078400135040283
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,1,2,0,0.017822399735450745
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,1536,1,0,0.3235840082168579
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,1536,4,0,0.09482399821281433
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,1536,8,0,0.055129599571228025
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,1536,32,0,0.042499199509620667
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,1536,64,0,0.03407999873161316
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,1536,16,0,0.05977439880371094
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,1536,2,0,0.1785040020942688
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,1536,1,0,0.33423519134521484
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,1536,4,0,0.10569119453430176
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,1536,8,0,0.06419839859008789
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,1536,32,0,0.052660799026489256
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,1536,64,0,0.04344159960746765
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,2048,1,0,0.5257760047912597
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,16,8,0,0.013659200072288514
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,16,32,0,0.01836639940738678
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,2048,4,0,0.1417904019355774
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,2048,8,0,0.07603999972343445
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,1024,64,0,0.03480480015277863
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,2048,64,0,0.04162079989910126
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,1536,2,0,0.1681712031364441
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,2048,2,0,0.2815855979919434
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,2048,1,0,0.5356783866882324
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,2048,4,0,0.15421119928359986
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,2048,8,0,0.08850240111351013
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,2048,16,0,0.07875199913978577
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,2048,32,0,0.0706496000289917
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,2048,64,0,0.053504002094268796
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,1536,16,0,0.07111999988555909
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,3072,4,0,0.2816463947296143
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,3072,8,0,0.16140320301055908
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,3072,2,0,0.5508048057556152
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,3072,32,0,0.08224959969520569
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,3072,64,0,0.060868799686431885
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,3072,1,0,1.0723135948181153
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,3072,4,0,0.2937808036804199
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,2048,2,0,0.26936960220336914
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,3072,2,0,0.5539055824279785
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,3072,16,0,0.10895999670028686
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,3072,32,0,0.09607999920845031
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,3072,64,0,0.07436479926109314
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,2048,32,0,0.0580560028553009
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,4096,2,0,0.9176719665527344
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,4096,8,0,0.24527359008789062
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,4096,16,0,0.13260799646377563
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,4096,1,0,1.8695503234863282
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,4096,32,0,0.12440320253372192
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,4096,64,0,0.0990064024925232
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,3072,16,0,0.09910240173339843
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,4096,4,0,0.4752848148345947
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,4096,2,0,0.9776432037353515
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,16,64,0,0.01242239996790886
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,4096,8,0,0.25800158977508547
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,4096,16,0,0.15381759405136108
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,4096,64,0,0.10570240020751953
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,2048,16,0,0.06845440268516541
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,4096,1,0,1.8002016067504882
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,6144,8,0,0.507097578048706
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,4096,4,0,0.46793279647827146
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,6144,4,0,0.9955280303955079
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,6144,16,0,0.2968240022659302
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,6144,32,0,0.185808002948761
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,6144,64,0,0.13009920120239257
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,6144,2,0,1.9678943634033204
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,6144,8,0,0.5857503890991211
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,6144,4,0,0.9859423637390137
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,6144,32,0,0.20334079265594482
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,6144,16,0,0.3297807931900024
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,6144,2,0,1.9523536682128906
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,6144,64,0,0.14900319576263427
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,3072,8,0,0.17428640127182007
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,3072,1,0,1.078164768218994
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,6144,1,0,4.231391906738281
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,8192,16,0,0.44954719543457033
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,8192,32,0,0.27343358993530276
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,8192,64,0,0.23782720565795898
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,8192,4,0,1.7692527770996094
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,8192,2,0,3.7298305511474608
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,8192,4,0,1.8445968627929688
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,4096,32,0,0.13623839616775513
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,8192,32,0,0.26570720672607423
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,8192,8,0,0.870366382598877
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,8192,8,0,0.9848624229431152
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,8192,64,0,0.2551919937133789
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,6144,1,0,4.133475112915039
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,10240,8,0,1.3304112434387207
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,8192,1,0,7.778257751464844
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,10240,4,0,2.884943962097168
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,10240,16,0,0.7046735763549805
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,10240,32,0,0.459116792678833
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,8192,16,0,0.49986557960510253
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,8192,1,0,7.278665924072266
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,10240,64,0,0.343502402305603
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,8192,2,0,3.4919055938720702
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,10240,16,0,0.7234992027282715
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,10240,8,0,1.4190352439880372
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,10240,32,0,0.4225471973419189
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,10240,64,0,0.3352895975112915
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,10240,2,0,5.972646331787109
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,10240,4,0,2.635646438598633
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,12288,8,0,2.0434255599975586
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,10240,2,0,5.652355194091797
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,12288,16,0,1.0970175743103028
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,12288,32,0,0.5853983879089355
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,12288,64,0,0.38425920009613035
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,12288,4,0,4.198587036132812
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,10240,1,0,11.686879730224609
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,12288,8,0,1.8954032897949218
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,12288,16,0,1.0828224182128907
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,12288,32,0,0.6224864006042481
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,12288,64,0,0.40710082054138186
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,12288,2,0,8.541407775878906
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,10240,1,0,11.221435546875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,12288,4,0,4.091990280151367
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,16384,16,0,1.852012825012207
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,16384,32,0,0.9459888458251953
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,16384,8,0,3.6119873046875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,12288,2,0,8.018344116210937
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,16384,64,0,0.5103903770446777
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,12288,1,0,16.71832275390625
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,16384,4,0,7.9704429626464846
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,16384,16,0,1.969491195678711
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,16384,32,0,0.9521936416625977
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,16384,4,0,7.071430206298828
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,16384,64,0,0.5303487777709961
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,12288,1,0,16.6279296875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,16384,2,0,14.78572998046875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,16384,8,0,3.5520320892333985
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,16384,2,0,14.199964904785157
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,32768,32,0,3.5825119018554688
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,32768,16,0,7.2844398498535154
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,1,32768,64,0,2.1474639892578127
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,16384,1,0,30.348751831054688
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,32768,8,0,15.36881103515625
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,16384,1,0,28.894241333007812
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,32768,16,0,6.988854217529297
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,32768,4,0,30.1200439453125
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,32768,8,0,14.122256469726562
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,32768,32,0,3.5479183197021484
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,1,1,0,0.013748799264431
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,1,4,0,0.012969599664211273
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,1,8,0,0.012937599420547485
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,1,2,0,0.013361600041389466
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,1,32,0,0.012958399951457977
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,1,32768,64,0,1.8764671325683593
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,1,1,0,0.019888000190258028
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,1,2,0,0.019105599820613862
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,1,4,0,0.01912800073623657
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,1,8,0,0.01892479956150055
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,1,16,0,0.019054399430751802
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,1,32,0,0.018764799833297728
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,1,64,0,0.0180976003408432
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,16,1,0,0.015777599811553956
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,16,2,0,0.014921599626541137
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,16,4,0,0.014393599331378936
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,16,8,0,0.014369599521160126
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,16,16,0,0.013724799454212188
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,16,32,0,0.01316000074148178
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,16,64,0,0.012868799269199371
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,16,1,0,0.021587200462818146
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,16,2,0,0.020124800503253937
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,16,4,0,0.019724799692630766
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,16,8,0,0.0197952002286911
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,16,16,0,0.019308799505233766
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,16,32,0,0.018681600689888
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,16,64,0,0.018435199558734894
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,32,1,0,0.01536639928817749
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,1,16,0,0.012859199941158295
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,32,2,0,0.01528480052947998
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,32,4,0,0.014046399295330048
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,32,8,0,0.014159999787807465
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,1,64,0,0.012990400195121765
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,32,32,0,0.013583999872207642
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,32,64,0,0.012972800433635712
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,32,1,0,0.0219200000166893
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,32,2,0,0.02123039960861206
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,32,4,0,0.020371200144290925
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,32,8,0,0.02008640021085739
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,32,32,0,0.01950560063123703
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,32,64,0,0.018985599279403687
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,64,1,0,0.0159168004989624
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,64,2,0,0.015244799852371215
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,64,4,0,0.014731200039386749
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,64,8,0,0.013996799290180207
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,64,16,0,0.013944000005722046
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,64,32,0,0.014006400108337402
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,64,64,0,0.013704000413417817
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,64,1,0,0.022563199698925018
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,64,2,0,0.021622399985790252
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,64,4,0,0.02114879935979843
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,64,8,0,0.021320000290870667
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,64,16,0,0.020132799446582795
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,64,32,0,0.019971199333667755
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,32,16,0,0.013849599659442902
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,64,64,0,0.019492800533771514
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,128,1,0,0.025867199897766112
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,128,2,0,0.018129600584506987
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,32,16,0,0.01990399956703186
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,128,4,0,0.017073599994182585
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,128,8,0,0.016979199647903443
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,128,16,0,0.016011199355125426
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,128,32,0,0.015945599973201753
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,128,64,0,0.015094399452209473
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,128,1,0,0.03541440069675446
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,128,4,0,0.02356639951467514
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,128,8,0,0.02279199957847595
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,128,16,0,0.022092799842357635
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,128,32,0,0.02210240066051483
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,128,64,0,0.021009600162506102
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,256,1,0,0.0537775993347168
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,256,2,0,0.029139199852943422
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,256,4,0,0.021350400149822236
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,256,8,0,0.02083040028810501
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,256,16,0,0.020735999941825865
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,256,32,0,0.019942399859428406
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,256,64,0,0.018848000466823576
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,256,1,0,0.06355519890785218
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,256,2,0,0.03754239976406097
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,256,4,0,0.028193598985671996
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,256,8,0,0.027595201134681703
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,256,16,0,0.02732959985733032
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,256,32,0,0.026174399256706237
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,256,64,0,0.024908800423145295
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,512,1,0,0.12281600236892701
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,512,2,0,0.06699680089950562
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,512,4,0,0.03713279962539673
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,128,2,0,0.024243199825286867
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,512,8,0,0.03399679958820343
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,512,16,0,0.03809759914875031
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,512,32,0,0.030024001002311708
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,512,64,0,0.025283199548721314
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,512,1,0,0.1323456048965454
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,512,4,0,0.046537598967552184
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,512,8,0,0.04162720143795014
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,32768,4,0,28.568057250976562
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,512,16,0,0.04755679965019226
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,512,64,0,0.03245919942855835
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,512,32,0,0.040057599544525146
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,1024,1,0,0.3265968084335327
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,1024,4,0,0.092467200756073
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,1024,2,0,0.18134080171585082
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,1024,16,0,0.048916798830032346
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,1024,32,0,0.047819200158119204
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,1024,64,0,0.034918400645256045
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,1024,2,0,0.18288480043411254
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,1024,1,0,0.34942400455474854
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,1024,4,0,0.10414400100708007
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,1024,16,0,0.05889279842376709
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,1024,32,0,0.058878397941589354
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,1024,64,0,0.04673120081424713
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,512,2,0,0.07685120105743408
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,1536,1,0,0.6325776100158691
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,1536,4,0,0.17064000368118287
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,1536,8,0,0.09727200269699096
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,1536,16,0,0.06614559888839722
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,1536,32,0,0.07211040258407593
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,1536,64,0,0.048187199234962466
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,1024,8,0,0.05357120037078857
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,1536,1,0,0.6398896217346192
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,1536,2,0,0.3382400035858154
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,1024,8,0,0.06517599821090699
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,1536,8,0,0.1120576024055481
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,1536,16,0,0.08003360033035278
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,1536,32,0,0.08710240125656128
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,1536,64,0,0.06298559904098511
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,2048,2,0,0.5351232051849365
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,2048,1,0,1.0376399993896483
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,2048,4,0,0.2770319938659668
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,1536,2,0,0.32546238899230956
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,2048,8,0,0.14821120500564575
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,2048,32,0,0.07944959998130799
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,2048,64,0,0.06553279757499694
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,2048,2,0,0.5396031856536865
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,2048,4,0,0.2877840042114258
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,2048,8,0,0.16386719942092895
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,2048,16,0,0.09963039755821228
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,1536,4,0,0.18395040035247803
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,2048,32,0,0.09354239702224731
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,2048,64,0,0.08174880146980286
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,3072,2,0,1.0796992301940918
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,2048,16,0,0.0835103988647461
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,3072,4,0,0.5510479927062988
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,3072,8,0,0.2944688081741333
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,3072,1,0,2.137068748474121
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,3072,64,0,0.10451200008392333
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,2048,1,0,1.044961643218994
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,32768,2,0,60.188934326171875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,3072,2,0,1.0834383964538574
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,3072,4,0,0.6436863899230957
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,3072,8,0,0.3106031894683838
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,3072,1,0,2.1943872451782225
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,3072,32,0,0.13564159870147705
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,3072,64,0,0.12885440587997438
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,3072,16,0,0.16883519887924195
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,4096,2,0,1.8361183166503907
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,3072,16,0,0.1966032028198242
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,4096,1,0,3.8685184478759767
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,4096,8,0,0.4841360092163086
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,4096,4,0,1.0014656066894532
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,4096,32,0,0.14675999879837037
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,4096,64,0,0.1531823992729187
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,3072,32,0,0.11583520174026489
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,4096,4,0,0.9352160453796386
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,4096,2,0,1.8607376098632813
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,4096,8,0,0.5001471996307373
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,4096,32,0,0.17040480375289918
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,4096,16,0,0.2957920074462891
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,4096,64,0,0.16732959747314452
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,32768,2,0,56.57227783203125
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,4096,16,0,0.2882832050323486
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,4096,1,0,3.7477855682373047
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,6144,32,0,0.32911200523376466
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,6144,64,0,0.2240895986557007
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,6144,4,0,2.0226144790649414
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,6144,8,0,1.0667183876037598
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,6144,2,0,4.322588729858398
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,6144,4,0,2.071352005004883
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,6144,16,0,0.593009614944458
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,6144,16,0,0.5322976112365723
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,6144,8,0,1.1310943603515624
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,6144,32,0,0.3518959999084473
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,6144,64,0,0.24337279796600342
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,6144,2,0,4.263302230834961
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,8192,8,0,1.837539291381836
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,8192,16,0,0.9604000091552735
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,8192,32,0,0.5166224002838135
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,6144,1,0,8.395611572265626
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,8192,64,0,0.28004319667816163
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,6144,1,0,8.850244903564453
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,8192,2,0,7.666494750976563
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,8192,4,0,3.722856140136719
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,8192,16,0,0.9225760459899902
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,8192,32,0,0.5144288063049316
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,8192,8,0,1.9203199386596679
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,8192,64,0,0.30614399909973145
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,8192,4,0,3.645391845703125
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,8192,2,0,7.259458923339844
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,8192,1,0,15.557284545898437
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,10240,8,0,2.839950370788574
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,10240,32,0,0.83514404296875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,10240,16,0,1.4907055854797364
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,10240,64,0,0.4640783786773682
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,8192,1,0,14.707421875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,10240,4,0,6.037094497680664
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,10240,2,0,11.839510345458985
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,10240,8,0,2.7865087509155275
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,10240,16,0,1.510259246826172
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,10240,64,0,0.4985231876373291
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,10240,32,0,0.8471823692321777
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,10240,4,0,6.01366081237793
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,10240,2,0,11.540105438232422
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,12288,8,0,4.3783008575439455
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,10240,1,0,24.153854370117188
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,12288,4,0,8.913198089599609
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,12288,32,0,1.1245280265808106
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,12288,16,0,2.1620607376098633
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,12288,64,0,0.6775072097778321
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,10240,1,0,23.40612030029297
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,12288,2,0,17.197830200195312
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,32768,1,0,118.8342041015625
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,12288,16,0,2.1465280532836912
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,12288,4,0,8.208646392822265
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,12288,64,0,0.6846960067749024
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,12288,8,0,4.086681747436524
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,12288,32,0,1.1292384147644043
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,32768,1,0,112.3868896484375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,16384,16,0,3.6995105743408203
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,12288,2,0,16.128814697265625
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,16384,8,0,7.494983673095703
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,16384,64,0,0.98788480758667
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,16384,32,0,1.911520004272461
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,12288,1,0,34.43412780761719
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,16384,4,0,15.649996948242187
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,16384,16,0,3.8715232849121093
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,16384,8,0,7.15472640991211
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,16384,32,0,1.9265039443969727
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,16384,64,0,1.0267439842224122
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,12288,1,0,32.629147338867185
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,16384,4,0,14.290126037597656
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,16384,2,0,31.23094787597656
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,16384,2,0,28.895294189453125
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,32768,32,0,7.49688949584961
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,32768,16,0,15.405039978027343
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,2,32768,64,0,3.80675048828125
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,32768,8,0,30.014785766601562
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,16384,1,0,61.19816284179687
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,16384,1,0,57.62758178710938
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,32768,16,0,14.590385437011719
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,32768,4,0,59.81363525390625
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,2,32768,64,0,3.8329566955566405
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,1,1,0,0.013891200721263885
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,1,2,0,0.013305599987506866
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,1,4,0,0.013176000118255616
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,32768,32,0,7.260382080078125
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,1,8,0,0.013467200100421906
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,1,16,0,0.013488000631332398
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,1,32,0,0.013476799428462981
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,1,64,0,0.013406400382518769
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,1,1,0,0.020524799823760986
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,1,2,0,0.020204800367355346
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,32768,8,0,28.9283203125
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,1,8,0,0.020019200444221497
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,1,16,0,0.019836799800395967
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,1,32,0,0.01964160054922104
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,1,64,0,0.019843199849128725
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,16,1,0,0.016238400340080263
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,16,2,0,0.01568319946527481
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,16,4,0,0.014828799664974213
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,16,8,0,0.014616000652313232
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,16,16,0,0.014395199716091156
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,16,32,0,0.01369439959526062
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,16,64,0,0.013596799969673157
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,16,1,0,0.022753599286079406
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,16,2,0,0.022023999691009523
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,16,4,0,0.02075839936733246
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,16,8,0,0.020000000298023225
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,16,16,0,0.019657599925994872
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,16,32,0,0.019312000274658202
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,16,64,0,0.01887039989233017
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,32,1,0,0.015969599783420562
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,32,2,0,0.015574400126934052
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,32,4,0,0.015113599598407745
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,32,8,0,0.014215999841690063
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,32,16,0,0.014000000059604644
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,32,32,0,0.013822400569915771
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,1,4,0,0.01890240013599396
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,32,64,0,0.013228799402713775
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,32,1,0,0.022580799460411072
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,32,4,0,0.02133920043706894
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,32,8,0,0.020070399343967437
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,32,16,0,0.019657599925994872
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,32,32,0,0.019387200474739075
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,32,64,0,0.019172799587249757
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,64,1,0,0.024835200607776643
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,64,2,0,0.016468800604343414
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,64,4,0,0.0158160001039505
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,64,8,0,0.01547040045261383
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,64,16,0,0.014377599954605103
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,64,32,0,0.01446239948272705
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,64,64,0,0.014228799939155578
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,64,1,0,0.03278400003910065
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,64,2,0,0.022870400547981264
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,64,4,0,0.02236959934234619
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,64,8,0,0.02197760045528412
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,64,16,0,0.02064319998025894
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,64,64,0,0.02003999948501587
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,128,1,0,0.04832479953765869
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,128,2,0,0.027302399277687073
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,128,4,0,0.018272000551223754
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,128,8,0,0.017641599476337432
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,128,16,0,0.017692799866199493
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,128,32,0,0.017107200622558594
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,128,64,0,0.01606079936027527
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,32,2,0,0.02143999934196472
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,128,1,0,0.058894401788711546
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,128,2,0,0.03575839996337891
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,128,4,0,0.024868799746036528
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,128,8,0,0.024260799586772918
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,128,32,0,0.023361599445343016
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,128,64,0,0.02175839990377426
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,256,1,0,0.09672480225563049
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,256,2,0,0.05570080280303955
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,256,4,0,0.033432000875473024
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,256,8,0,0.022801600396633148
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,256,16,0,0.0224031999707222
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,256,32,0,0.022316800057888032
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,256,64,0,0.02096160054206848
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,256,1,0,0.1070255994796753
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,256,2,0,0.06616320013999939
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,256,4,0,0.041545599699020386
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,256,8,0,0.03012160062789917
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,256,16,0,0.029595199227333068
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,256,32,0,0.0292959988117218
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,256,64,0,0.027561599016189577
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,512,1,0,0.22811200618743896
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,512,2,0,0.12270560264587402
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,512,4,0,0.06976640224456787
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,128,16,0,0.024003200232982635
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,512,8,0,0.0546239972114563
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,512,16,0,0.03657119870185852
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,512,32,0,0.03989599943161011
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,512,64,0,0.03272320032119751
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,512,1,0,0.23994081020355223
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,512,4,0,0.08268160223960877
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,512,8,0,0.06748800277709961
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,512,16,0,0.04686720073223114
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,512,32,0,0.050995200872421265
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,512,64,0,0.04354879856109619
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,64,32,0,0.02043360024690628
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,1024,1,0,0.6439311981201172
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,1024,2,0,0.33097119331359864
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,1024,8,0,0.09913920164108277
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,1024,4,0,0.17720479965209962
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,1024,16,0,0.07836959958076477
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,1024,32,0,0.055134397745132444
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,1024,64,0,0.05301759839057922
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,1024,1,0,0.652016019821167
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,512,2,0,0.13430880308151244
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,1024,2,0,0.34760799407958987
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,1024,4,0,0.19214400053024291
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,1024,8,0,0.11491680145263672
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,1024,16,0,0.09556639790534974
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,1024,32,0,0.06873760223388672
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,1024,64,0,0.06960480213165283
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,1536,2,0,0.6422431945800782
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,1536,4,0,0.3322848081588745
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,1536,8,0,0.1806656002998352
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,1536,16,0,0.12221280336380005
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,1536,32,0,0.08816159963607788
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,1536,64,0,0.08586400151252746
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,32768,4,0,56.83748168945313
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,1536,1,0,1.2556672096252441
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,1536,2,0,0.7327424049377441
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,1536,4,0,0.34669759273529055
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,1536,8,0,0.21166880130767823
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,1536,32,0,0.11160000562667846
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,1536,1,0,1.2507535934448242
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,1536,64,0,0.10796159505844116
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,2048,2,0,1.0522159576416015
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,2048,4,0,0.5399824142456054
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,2048,1,0,2.1254751205444338
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,2048,8,0,0.28869919776916503
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,2048,32,0,0.13171039819717406
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,2048,64,0,0.09376959800720215
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,1536,16,0,0.14103519916534424
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,2048,2,0,1.0495295524597168
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,2048,4,0,0.5534031867980957
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,2048,16,0,0.18399840593338013
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,2048,1,0,2.0854848861694335
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,2048,32,0,0.1524448037147522
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,2048,64,0,0.1218832015991211
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,2048,16,0,0.1723871946334839
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,3072,4,0,1.0934639930725099
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,3072,2,0,2.228059196472168
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,3072,8,0,0.5947711944580079
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,3072,16,0,0.34563360214233396
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,2048,8,0,0.3063103914260864
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,3072,64,0,0.1601696014404297
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,3072,32,0,0.21871840953826904
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,3072,1,0,4.647419357299805
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,3072,4,0,1.1468447685241698
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,3072,16,0,0.34969439506530764
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,3072,8,0,0.6443071842193604
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,3072,32,0,0.24116799831390381
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,3072,2,0,2.219745635986328
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,3072,64,0,0.19162240028381347
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,3072,1,0,4.430487823486328
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,4096,8,0,0.9913552284240723
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,4096,16,0,0.5223440170288086
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,4096,2,0,3.9601470947265627
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,4096,32,0,0.29185919761657714
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,4096,64,0,0.2530303955078125
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,4096,4,0,1.9252000808715821
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,4096,4,0,1.8483663558959962
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,4096,2,0,3.7908592224121094
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,4096,8,0,0.9754591941833496
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,4096,16,0,0.5835728168487548
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,4096,32,0,0.33582239151000975
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,4096,64,0,0.2774431943893433
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,4096,1,0,8.022994995117188
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,4096,1,0,7.7559967041015625
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,6144,8,0,2.147345542907715
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,6144,16,0,1.1351375579833984
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,6144,4,0,4.1403968811035154
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,6144,32,0,0.6290544033050537
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,6144,64,0,0.4344655990600586
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,32768,2,0,119.1584228515625
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,6144,2,0,8.700974273681641
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,6144,8,0,2.1577808380126955
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,6144,4,0,4.264352035522461
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,6144,16,0,1.1378527641296388
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,6144,64,0,0.4687056064605713
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,6144,32,0,0.6626848220825196
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,6144,2,0,8.522647857666016
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,6144,1,0,17.725230407714843
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,8192,8,0,3.8793712615966798
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,8192,4,0,7.762723541259765
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,8192,16,0,1.9635616302490235
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,6144,1,0,17.241799926757814
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,8192,64,0,0.5771567821502686
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,8192,32,0,1.0463744163513184
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,32768,2,0,113.1752685546875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,8192,2,0,15.683169555664062
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,8192,16,0,1.9848224639892578
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,8192,4,0,7.368510437011719
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,8192,32,0,1.0435487747192382
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,8192,64,0,0.6363647937774658
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,8192,8,0,3.684756851196289
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,8192,2,0,15.666226196289063
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,8192,1,0,30.68419189453125
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,10240,8,0,5.977143859863281
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,10240,16,0,3.033075141906738
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,10240,32,0,1.575551986694336
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,10240,4,0,11.803553771972656
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,10240,64,0,0.8848575592041016
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,8192,1,0,29.27607116699219
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,10240,2,0,24.12860107421875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,10240,8,0,5.637329483032227
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,10240,16,0,2.9492416381835938
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,10240,32,0,1.6149311065673828
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,10240,64,0,0.9583552360534668
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,10240,4,0,11.919457244873048
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,10240,2,0,22.979757690429686
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,12288,8,0,8.518692779541016
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,10240,1,0,48.04190673828125
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,12288,32,0,2.17872314453125
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,12288,4,0,17.178713989257812
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,12288,64,0,1.2347488403320312
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,12288,16,0,4.348839950561524
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,10240,1,0,45.861932373046876
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,12288,2,0,34.85338134765625
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,12288,4,0,16.50615997314453
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,12288,16,0,4.17225456237793
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,12288,8,0,8.122676849365234
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,12288,64,0,1.267563247680664
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,12288,32,0,2.1776655197143553
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,12288,2,0,33.09176330566406
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,32768,1,0,241.98505859375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,12288,1,0,70.16497802734375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,16384,8,0,15.421121215820312
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,16384,4,0,30.297866821289062
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,4,16384,64,0,2.036809539794922
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,16384,32,0,3.8838367462158203
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,16384,16,0,7.789814758300781
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,32768,1,0,230.9039794921875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,12288,1,0,65.57588500976563
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,16384,16,0,7.520793914794922
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,16384,8,0,15.051174926757813
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,1,1,0,0.01494240015745163
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,1,2,0,0.014428800344467163
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,1,4,0,0.01308320015668869
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,1,8,0,0.01310880035161972
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,1,16,0,0.013040000200271606
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,4,16384,64,0,2.095529556274414
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,1,64,0,0.012891200184822083
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,1,1,0,0.0200095996260643
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,1,2,0,0.01902880072593689
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,1,4,0,0.018934400379657747
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,1,8,0,0.018507200479507446
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,1,16,0,0.018612800538539885
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,1,32,0,0.018641600012779237
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,1,64,0,0.01873439997434616
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,16,1,0,0.016505600512027742
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,16,2,0,0.015699200332164764
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,16,4,0,0.015491199493408204
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,16,8,0,0.014711999893188476
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,16,16,0,0.013969600200653076
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,16,32,0,0.013675199449062347
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,16,64,0,0.013187199831008911
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,16,1,0,0.022947199642658234
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,16,2,0,0.022433599829673766
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,16,4,0,0.021988800168037413
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,16,8,0,0.020206399261951447
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,16,16,0,0.01974879950284958
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,16,32,0,0.01950560063123703
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,16,64,0,0.01907680034637451
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,32,1,0,0.024751999974250795
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,32,2,0,0.016256000101566314
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,32,4,0,0.015780800580978395
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,32,8,0,0.015438400208950043
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,32,16,0,0.014678399264812469
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,32,32,0,0.013916799426078796
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,32,64,0,0.01363680064678192
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,32,1,0,0.032739201188087465
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,32,2,0,0.022988800704479218
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,32,4,0,0.02227199971675873
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,32,8,0,0.021988800168037413
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,32,16,0,0.020636799931526183
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,32,32,0,0.020207999646663664
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,32,64,0,0.019475199282169342
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,64,1,0,0.045607998967170715
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,64,2,0,0.02463040053844452
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,64,4,0,0.01681919991970062
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,64,8,0,0.016598400473594666
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,64,16,0,0.01640480011701584
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,64,32,0,0.015435199439525604
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,64,64,0,0.015003199875354766
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,64,1,0,0.05502880215644836
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,64,2,0,0.03264000117778778
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,64,4,0,0.02383680045604706
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,64,8,0,0.02311040014028549
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,64,16,0,0.022702400386333466
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,64,32,0,0.02179519981145859
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,16384,32,0,3.7978847503662108
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,1,32,0,0.013281600177288055
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,128,2,0,0.04984959959983826
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,128,4,0,0.02879360020160675
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,128,8,0,0.01990240067243576
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,128,16,0,0.019539199769496918
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,128,32,0,0.019340799748897554
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,128,64,0,0.017795200645923614
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,128,1,0,0.09473279714584351
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,128,2,0,0.060331201553344725
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,16384,2,0,60.83540649414063
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,128,4,0,0.044627198576927186
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,128,8,0,0.030027198791503906
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,128,16,0,0.0295087993144989
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,128,32,0,0.02961440086364746
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,128,64,0,0.02744640111923218
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,64,64,0,0.021315200626850127
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,256,1,0,0.1944416046142578
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,256,4,0,0.06244959831237793
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,16384,4,0,29.570559692382812
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,256,16,0,0.028484800457954408
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,256,32,0,0.028388801217079162
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,256,64,0,0.026259198784828186
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,256,1,0,0.20112159252166747
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,256,2,0,0.11722559928894043
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,256,4,0,0.07489600181579589
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,256,8,0,0.050755202770233154
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,256,16,0,0.03671840131282807
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,256,32,0,0.03558399975299835
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,256,64,0,0.03448159992694855
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,512,1,0,0.4465616226196289
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,512,2,0,0.23471999168395996
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,128,1,0,0.08449599742889405
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,512,4,0,0.1286784052848816
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,512,8,0,0.0936735987663269
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,512,16,0,0.06296640038490295
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,256,8,0,0.038710400462150574
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,512,32,0,0.04267359972000122
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,512,2,0,0.248524808883667
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,512,1,0,0.458955192565918
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,512,4,0,0.14382239580154418
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,512,8,0,0.10959199666976929
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,512,16,0,0.08090720176696778
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,512,64,0,0.06199679970741272
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,1024,2,0,0.6505663871765137
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,1024,4,0,0.33895680904388426
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,1024,8,0,0.18766239881515503
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,1024,16,0,0.13592000007629396
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,1024,32,0,0.09610880017280579
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,256,2,0,0.09851040244102478
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,1024,64,0,0.06837440133094788
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,512,64,0,0.04660640060901642
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,1024,2,0,0.6697279930114746
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,1024,4,0,0.3643887996673584
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,512,32,0,0.057171201705932616
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,1024,16,0,0.16053760051727295
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,1024,1,0,1.281454372406006
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,1024,32,0,0.12052160501480103
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,1024,64,0,0.09440799951553344
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,1536,4,0,0.6542575836181641
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,1024,1,0,1.2696847915649414
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,1536,8,0,0.35072319507598876
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,1536,2,0,1.2635552406311035
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,1536,16,0,0.22930240631103516
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,1536,32,0,0.1549839973449707
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,1536,1,0,2.527288055419922
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,1536,2,0,1.2917344093322753
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,1024,8,0,0.21098239421844484
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,1536,4,0,0.7149983882904053
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,1536,8,0,0.3827728033065796
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,1536,16,0,0.25759038925170896
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,1536,32,0,0.19100799560546874
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,1536,64,0,0.14833279848098754
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,1536,1,0,2.5198240280151367
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,2048,8,0,0.5732384204864502
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,2048,4,0,1.065510368347168
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,1536,64,0,0.12096960544586181
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,2048,16,0,0.32013440132141113
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,2048,64,0,0.16472959518432617
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,2048,2,0,2.169910430908203
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,2048,4,0,1.1529871940612793
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,2048,2,0,2.088657569885254
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,2048,1,0,4.326643371582032
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,2048,8,0,0.625267219543457
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,2048,16,0,0.3505199909210205
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,2048,64,0,0.20917119979858398
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,2048,32,0,0.27174398899078367
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,2048,32,0,0.22506399154663087
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,2048,1,0,4.264871978759766
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,3072,8,0,1.131769561767578
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,3072,16,0,0.6245168209075928
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,3072,32,0,0.3899935960769653
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,3072,4,0,2.2542863845825196
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,3072,64,0,0.27243039608001707
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,3072,2,0,4.7587120056152346
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,3072,4,0,2.227118492126465
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,3072,16,0,0.6840000152587891
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,3072,8,0,1.2764543533325194
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,3072,32,0,0.4486720085144043
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,3072,64,0,0.34018559455871583
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,3072,2,0,4.555289459228516
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,3072,1,0,9.546692657470704
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,16384,2,0,58.192413330078125
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,4096,16,0,1.0797504425048827
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,4096,4,0,4.024201583862305
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,4096,8,0,2.0565311431884767
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,3072,1,0,9.150057220458985
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,4096,32,0,0.5845695972442627
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,4096,64,0,0.41933598518371584
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,4096,8,0,2.003487968444824
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,4096,2,0,8.306861114501952
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,4096,4,0,3.9173873901367187
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,4096,16,0,1.1693504333496094
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,4096,32,0,0.6603375911712647
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,4096,64,0,0.4889120101928711
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,4096,2,0,7.862760162353515
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,4096,1,0,16.289064025878908
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,6144,16,0,2.278112030029297
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,6144,32,0,1.2865839958190919
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,6144,8,0,4.286439895629883
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,4096,1,0,15.747213745117188
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,6144,64,0,0.7821392059326172
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,6144,4,0,8.993032073974609
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,6144,8,0,4.55389289855957
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,6144,2,0,18.56848602294922
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,6144,4,0,8.795398712158203
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,6144,16,0,2.402801513671875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,6144,32,0,1.3506208419799806
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,6144,64,0,0.8665375709533691
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,16384,1,0,120.25946044921875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,6144,2,0,17.704603576660155
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,8192,8,0,7.928401947021484
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,8192,32,0,2.039228820800781
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,8192,16,0,3.968608093261719
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,8,8192,64,0,1.1000927925109862
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,6144,1,0,35.88941345214844
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,8192,4,0,15.584803771972656
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,8192,8,0,7.5991455078125
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,16384,1,0,114.55150146484375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,6144,1,0,34.19465942382813
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,8192,16,0,3.920632171630859
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,1,1,0,0.01512800008058548
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,1,2,0,0.01422239989042282
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,1,4,0,0.013993600010871887
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,1,8,0,0.013870400190353394
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,1,16,0,0.013249599933624267
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,1,32,0,0.013030399382114411
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,1,64,0,0.013153600692749023
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,8,8192,64,0,1.1689151763916015
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,1,2,0,0.01860480010509491
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,1,4,0,0.01844799965620041
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,1,8,0,0.01844000071287155
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,1,16,0,0.018113599717617036
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,1,32,0,0.018164800107479097
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,1,64,0,0.0182096004486084
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,16,1,0,0.0245728000998497
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,16,2,0,0.016260799765586854
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,16,4,0,0.015724800527095795
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,8192,4,0,15.132997131347656
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,16,8,0,0.015545600652694702
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,16,32,0,0.01364160031080246
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,16,16,0,0.016172799468040466
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,16,1,0,0.03701919913291931
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,16,2,0,0.023712000250816344
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,16,4,0,0.023073600232601167
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,8192,2,0,31.90672607421875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,16,8,0,0.022932800650596618
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,16,32,0,0.021315200626850127
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,16,16,0,0.023937599360942842
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,16,64,0,0.020392000675201416
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,32,2,0,0.02534399926662445
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,32,4,0,0.01732960045337677
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,32,8,0,0.01690720021724701
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,32,16,0,0.016760000586509706
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,32,32,0,0.015503999590873719
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,1,1,0,0.019699199497699736
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,32,64,0,0.015033599734306336
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,32,1,0,0.056436800956726076
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,32,4,0,0.024246400594711302
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,32,8,0,0.02304159998893738
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,32,16,0,0.02316959947347641
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,32,32,0,0.02168480008840561
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,32,64,0,0.02093919962644577
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,64,1,0,0.0819760024547577
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,64,2,0,0.04722239971160889
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,64,4,0,0.02640959918498993
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,16,64,0,0.01361439973115921
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,64,16,0,0.017931200563907623
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,64,32,0,0.018087999522686006
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,64,64,0,0.01709599941968918
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,64,1,0,0.09117599725723266
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,64,2,0,0.057339197397232054
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,64,4,0,0.03469760119915009
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,64,8,0,0.025654399394989015
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,32,1,0,0.051601600646972653
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,64,32,0,0.02513119876384735
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,64,64,0,0.02359199970960617
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,128,1,0,0.1544144034385681
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,8192,32,0,2.060166358947754
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,128,2,0,0.08716639876365662
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,128,8,0,0.03240160048007965
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,128,4,0,0.05293599963188171
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,128,16,0,0.023132799565792082
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,128,32,0,0.022745600342750548
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,128,64,0,0.02221280038356781
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,128,2,0,0.09909120202064514
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,128,4,0,0.06496800184249878
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,128,8,0,0.044803199172019956
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,128,16,0,0.03258880078792572
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,128,32,0,0.032183998823165895
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,64,8,0,0.01866080015897751
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,128,64,0,0.03131519854068756
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,256,1,0,0.3440959930419922
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,256,2,0,0.18626400232315063
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,256,4,0,0.10361280441284179
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,64,16,0,0.02510400116443634
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,256,8,0,0.066048002243042
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,256,16,0,0.044817599654197696
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,256,32,0,0.032348799705505374
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,256,64,0,0.03154560029506683
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,32,2,0,0.033155199885368344
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,256,1,0,0.3605792045593262
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,256,4,0,0.11916160583496094
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,256,8,0,0.08272960186004638
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,256,16,0,0.06213120222091675
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,256,32,0,0.04582079946994781
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,256,64,0,0.044675201177597046
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,128,1,0,0.16704319715499877
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,512,4,0,0.2444000005722046
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,512,2,0,0.45810561180114745
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,512,1,0,0.8737520217895508
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,512,32,0,0.07757599949836731
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,512,64,0,0.05621119737625122
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,8192,2,0,30.038323974609376
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,512,1,0,0.8841088294982911
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,512,2,0,0.5078080177307129
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,512,4,0,0.26582720279693606
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,512,8,0,0.20463840961456298
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,512,16,0,0.13260639905929567
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,512,32,0,0.10630719661712647
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,512,64,0,0.07976959943771363
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,512,16,0,0.10926079750061035
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,1024,4,0,0.6693840026855469
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,1024,2,0,1.2825920104980468
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,1024,8,0,0.3641184091567993
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,1024,16,0,0.25073120594024656
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,1024,32,0,0.16590399742126466
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,1024,1,0,2.584087944030762
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,256,2,0,0.2
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,1024,2,0,1.3172351837158203
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,1024,4,0,0.7297632217407226
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,1024,8,0,0.40375361442565916
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,1024,16,0,0.2887216091156006
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,1024,64,0,0.1647312045097351
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,1024,32,0,0.20791840553283691
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,1024,1,0,2.5116304397583007
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,512,8,0,0.17237600088119506
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,1536,8,0,0.7042272090911865
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,1536,16,0,0.43297600746154785
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,1024,64,0,0.12480159997940063
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,1536,64,0,0.1987071990966797
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,1536,2,0,2.593734359741211
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,1536,4,0,1.4343631744384766
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,1536,2,0,2.5508127212524414
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,1536,4,0,1.2976832389831543
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,1536,8,0,0.76353440284729
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,1536,16,0,0.4918464183807373
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,1536,32,0,0.28407039642333987
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,1536,64,0,0.2594496011734009
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,1536,1,0,5.172320175170898
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,2048,4,0,2.1520320892333986
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,2048,8,0,1.1301615715026856
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,2048,16,0,0.6509456157684326
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,2048,32,0,0.4326591968536377
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,2048,64,0,0.29688639640808107
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,1536,32,0,0.3346240043640137
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,2048,2,0,4.378478240966797
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,2048,8,0,1.270475196838379
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,2048,4,0,2.1549823760986326
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,8192,1,0,63.058538818359374
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,2048,16,0,0.6963888168334961
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,1536,1,0,5.304727935791016
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,2048,64,0,0.39272480010986327
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,2048,2,0,4.306028747558594
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,2048,1,0,8.877033233642578
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,3072,16,0,1.2614144325256347
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,2048,32,0,0.4919167995452881
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,3072,32,0,0.7658080101013184
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,3072,64,0,0.4833631992340088
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,2048,1,0,8.667795562744141
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,3072,4,0,4.742145538330078
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,3072,8,0,2.3256383895874024
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,3072,8,0,2.481025505065918
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,3072,16,0,1.2955936431884765
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,3072,32,0,0.9033408164978027
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,3072,2,0,9.43409423828125
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,3072,64,0,0.5938608169555664
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,3072,4,0,4.594718551635742
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,8192,1,0,60.87241821289062
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,4096,16,0,2.1133407592773437
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,3072,2,0,9.192422485351562
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,4096,32,0,1.2052000045776368
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,4096,8,0,4.050791931152344
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,16,4096,64,0,0.7636576175689698
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,4096,4,0,8.077910614013671
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,3072,1,0,19.30079803466797
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,4096,16,0,2.2111440658569337
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,4096,8,0,4.089470291137696
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,3072,1,0,18.210162353515624
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,4096,4,0,7.91809310913086
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,4096,32,0,1.279147243499756
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,1,2,0,0.013635200262069703
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,1,4,0,0.013606399297714233
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,16,4096,64,0,0.9127696037292481
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,1,8,0,0.013812799751758576
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,1,32,0,0.013809600472450256
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,32,1,64,0,0.013406400382518769
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,1,1,0,0.020500800013542174
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,1,2,0,0.019529600441455842
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,1,4,0,0.01926400065422058
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,1,8,0,0.019182400405406953
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,1,16,0,0.019091199338436126
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,1,32,0,0.01892479956150055
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,32,1,64,0,0.01903039962053299
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,16,1,0,0.04629279971122742
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,16,2,0,0.025563201308250426
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,16,4,0,0.017582400143146514
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,16,8,0,0.017153599858283998
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,16,16,0,0.01643519997596741
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,16,32,0,0.015276800096035003
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,4096,2,0,16.34723815917969
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,16,1,0,0.06200000047683716
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,16,2,0,0.03711679875850678
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,1,1,0,0.016624000668525696
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,16,4,0,0.026804798841476442
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,16,8,0,0.023876799643039702
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,16,16,0,0.024571199715137482
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,16,32,0,0.021964800357818604
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,32,16,64,0,0.022908799350261688
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,32,1,0,0.08277919888496399
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,32,2,0,0.049876800179481505
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,32,4,0,0.02707040011882782
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,1,16,0,0.013167999684810638
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,32,16,0,0.018799999356269838
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,32,32,0,0.018211199343204497
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,32,32,64,0,0.016993600130081176
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,32,1,0,0.09166240096092224
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,32,2,0,0.05791680216789245
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,32,4,0,0.03520799875259399
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,32,8,0,0.02609120011329651
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,32,16,0,0.025489598512649536
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,32,32,0,0.024953599274158477
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,32,32,64,0,0.023846399784088135
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,64,1,0,0.1460976004600525
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,64,2,0,0.08570399880409241
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,64,4,0,0.051107197999954224
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,64,8,0,0.030112001299858093
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,64,16,0,0.02227199971675873
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,64,32,0,0.021777600049972534
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,32,64,64,0,0.021377600729465485
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,64,1,0,0.1584879994392395
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,64,4,0,0.0632431983947754
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,32,16,64,0,0.014958399534225463
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,64,8,0,0.0432096004486084
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,64,16,0,0.03168480098247528
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,32,64,64,0,0.03095200061798096
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,128,1,0,0.29437119960784913
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,128,2,0,0.1582576036453247
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,128,4,0,0.09318879842758179
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,32,8,0,0.019945600628852846
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,128,8,0,0.05928959846496582
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,128,16,0,0.04027679860591889
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,32,128,64,0,0.029345598816871644
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,128,1,0,0.3119312047958374
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,128,2,0,0.1745344042778015
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,128,4,0,0.11031199693679809
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,128,8,0,0.07618240118026734
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,128,16,0,0.05754079818725586
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,64,2,0,0.09622079730033875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,128,32,0,0.04337919950485229
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,32,128,64,0,0.04198080003261566
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,256,4,0,0.19420000314712524
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,64,32,0,0.03139680027961731
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,256,2,0,0.36038880348205565
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,256,8,0,0.1173200011253357
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,256,16,0,0.07954559922218322
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,256,32,0,0.059113597869873045
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,256,1,0,0.6773136138916016
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,32,256,64,0,0.04541119933128357
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,256,8,0,0.14095360040664673
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,256,4,0,0.21847200393676758
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,4096,2,0,16.645062255859376
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,256,16,0,0.1033951997756958
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,256,32,0,0.0837119996547699
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,256,1,0,0.6984784126281738
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,32,256,64,0,0.06780959963798523
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,128,32,0,0.029543998837471008
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,512,4,0,0.4757376194000244
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,512,16,0,0.200547194480896
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,512,2,0,0.9091471672058106
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,512,32,0,0.1365216016769409
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,512,1,0,1.7257776260375977
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,512,2,0,0.9435664176940918
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,512,4,0,0.5164175987243652
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,512,8,0,0.3639103889465332
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,512,16,0,0.23985280990600585
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,512,1,0,1.7616544723510743
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,512,32,0,0.176801598072052
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,32,512,64,0,0.14593759775161744
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,512,8,0,0.3264528036117554
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,32,512,64,0,0.10510879755020142
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,1024,4,0,1.3209199905395508
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,1024,8,0,0.7119120121002197
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,1024,2,0,2.6182064056396483
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,1024,32,0,0.30774240493774413
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,32,1024,64,0,0.22497279644012452
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,256,2,0,0.3840399980545044
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,4096,1,0,33.56925659179687
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,1024,1,0,5.192214584350586
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,1024,8,0,0.8528608322143555
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,1024,16,0,0.47820158004760743
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,1024,32,0,0.3932816028594971
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,1024,2,0,2.674728012084961
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,32,1024,64,0,0.30056641101837156
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,4096,1,0,32.19361877441406
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,1024,1,0,5.154595184326172
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,1024,4,0,1.3842975616455078
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,1536,4,0,2.6134096145629884
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,1536,16,0,0.8708928108215332
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,32,1536,64,0,0.35952000617980956
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,1024,16,0,0.5808832168579101
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,1536,8,0,1.46779203414917
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,1536,2,0,5.3455665588378904
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,1536,4,0,2.664254379272461
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,1536,32,0,0.6643695831298828
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,1536,16,0,0.9561200141906738
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,1536,8,0,1.4466431617736817
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,32,1536,64,0,0.477188777923584
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,1536,2,0,5.350563049316406
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,2048,8,0,2.2504512786865236
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,2048,16,0,1.2329615592956542
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,2048,32,0,0.833614444732666
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,1536,32,0,0.519049596786499
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,1536,1,0,10.884772491455077
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,32,2048,64,0,0.5294847965240479
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,2048,4,0,4.4366191864013675
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,2048,16,0,1.4460528373718262
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,2048,8,0,2.3589487075805664
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,1536,1,0,10.42428970336914
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,1,1,0,0.0160863995552063
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,2048,32,0,0.9662848472595215
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,1,2,0,0.01499360054731369
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,1,4,0,0.014567999541759491
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,1,8,0,0.014483200013637542
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,1,16,0,0.014532800018787383
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,1,32,0,0.01419679969549179
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,64,1,64,0,0.014545600116252898
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,1,1,0,0.02191520035266876
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,1,2,0,0.02095839977264404
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,1,4,0,0.02022559940814972
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,1,8,0,0.020083199441432952
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,1,16,0,0.01995680034160614
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,1,32,0,0.02028159946203232
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,64,1,64,0,0.019892799854278564
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,2048,4,0,4.5283550262451175
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,16,1,0,0.08534240126609802
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,16,2,0,0.04940159916877747
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,16,8,0,0.02005600035190582
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,16,4,0,0.030928000807762146
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,16,32,0,0.017854399979114532
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,64,16,64,0,0.018603199720382692
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,16,1,0,0.09193440079689026
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,16,2,0,0.061812800168991086
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,16,4,0,0.03650079965591431
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,16,8,0,0.028148800134658813
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,16,16,0,0.025726398825645445
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,16,32,0,0.025326400995254517
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,64,16,64,0,0.024329599738121033
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,32,2,0,0.08750240206718445
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,32,4,0,0.05184800028800964
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,32,8,0,0.031176000833511353
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,32,16,0,0.022969600558280946
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,32,32,0,0.0222448006272316
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,64,32,64,0,0.02080000042915344
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,32,1,0,0.15882400274276734
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,32,2048,64,0,0.6918335914611816
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,32,2,0,0.09748160243034362
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,32,4,0,0.06375200152397156
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,32,8,0,0.0443583995103836
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,32,16,0,0.032492798566818235
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,64,32,64,0,0.0301503986120224
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,2048,2,0,8.865174102783204
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,64,1,0,0.2822927951812744
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,2048,2,0,8.820217895507813
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,64,2,0,0.15940959453582765
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,64,4,0,0.09272159934043885
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,64,16,0,0.03993119895458221
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,64,8,0,0.06121600270271301
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,64,32,0,0.02910720109939575
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,64,64,64,0,0.02877120077610016
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,64,2,0,0.16811519861221313
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,64,4,0,0.10697920322418213
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,64,1,0,0.30055201053619385
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,64,8,0,0.07410399913787842
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,64,16,0,0.05602399706840515
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,64,32,0,0.042824000120162964
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,64,64,64,0,0.04147039949893951
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,128,4,0,0.17197279930114745
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,128,2,0,0.30485920906066893
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,128,8,0,0.10702240467071533
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,128,1,0,0.5780704021453857
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,128,16,0,0.07330080270767211
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,64,128,64,0,0.043489599227905275
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,128,1,0,0.6064799785614013
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,32,32,0,0.03139039874076843
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,128,2,0,0.3262752056121826
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,16,16,0,0.019179199635982514
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,128,16,0,0.09737280011177063
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,128,32,0,0.0789680004119873
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,64,128,64,0,0.06459519863128663
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,2048,1,0,17.95552978515625
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,32,1,0,0.15103360414505004
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,256,4,0,0.3754319906234741
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,128,32,0,0.05486239790916443
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,256,8,0,0.2210927963256836
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,256,1,0,1.3423855781555176
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,256,16,0,0.1452448010444641
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,256,32,0,0.10691360235214234
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,64,256,64,0,0.08265439867973327
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,256,4,0,0.41629757881164553
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,256,2,0,0.7457359790802002
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,256,8,0,0.2615200042724609
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,256,16,0,0.1850160002708435
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,256,32,0,0.1468160033226013
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,256,1,0,1.37980318069458
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,64,256,64,0,0.12066080570220947
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,2048,1,0,17.513880920410156
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,512,4,0,0.9327919960021973
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,256,2,0,0.7663375854492187
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,512,16,0,0.3802416086196899
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,512,32,0,0.25361440181732176
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,512,2,0,1.7952800750732423
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,64,512,64,0,0.19049439430236817
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,128,4,0,0.19553920030593872
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,128,8,0,0.13143520355224608
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,512,8,0,0.7065120220184327
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,512,1,0,3.4504512786865233
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,512,16,0,0.45401921272277834
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,512,32,0,0.3301215887069702
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,512,2,0,1.8550848007202148
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,512,8,0,0.6690527915954589
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,512,1,0,3.489329528808594
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,1024,8,0,1.4063199996948241
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,64,1024,64,0,0.41142878532409666
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,1024,32,0,0.5877840042114257
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,1024,16,0,0.9261792182922364
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,1024,4,0,2.6767648696899413
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,512,4,0,1.0053983688354493
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,64,512,64,0,0.2684992074966431
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,1024,16,0,1.067195224761963
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,1024,32,0,0.731601619720459
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,1024,8,0,1.6064111709594726
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,1,1,0,0.017451199889183044
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,1,2,0,0.016121600568294526
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,64,1024,64,0,0.5602255821228027
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,1,8,0,0.015459200739860535
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,1,32,0,0.015612800419330598
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,1,16,0,0.01544640064239502
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,128,1,64,0,0.015515199303627015
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,1,1,0,0.023204800486564637
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,1,2,0,0.021939200162887574
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,1,4,0,0.02168319970369339
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,1,8,0,0.02160159945487976
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,1,16,0,0.02136639952659607
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,128,1,64,0,0.021432000398635864
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,16,1,0,0.14941279888153075
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,16,2,0,0.08739359974861145
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,16,4,0,0.05248479843139649
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,16,8,0,0.03171359896659851
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,16,16,0,0.023721599578857423
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,16,32,0,0.02261279970407486
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,128,16,64,0,0.021883200109004974
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,16,1,0,0.1593791961669922
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,1024,2,0,5.402104187011719
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,16,2,0,0.09799839854240418
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,16,8,0,0.04466719925403595
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,16,4,0,0.0677295982837677
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,16,16,0,0.03323839902877808
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,128,16,64,0,0.03136320114135742
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,1024,2,0,5.368420791625977
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,32,1,0,0.2808128118515015
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,32,2,0,0.1566383957862854
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,32,4,0,0.09384959936141968
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,32,8,0,0.05893440246582031
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,1,32,0,0.021740800142288207
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,32,16,0,0.04005120098590851
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,128,32,64,0,0.02860960066318512
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,1024,1,0,10.651255798339843
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,32,2,0,0.1713760018348694
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,32,1,0,0.2942255973815918
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,32,16,0,0.05636640191078186
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,32,4,0,0.11483839750289918
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,32,32,0,0.04287680089473724
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,128,32,64,0,0.04292480051517487
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,64,1,0,0.5372911930084229
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,64,2,0,0.29280800819396974
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,64,4,0,0.16717439889907837
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,64,16,0,0.07154240012168885
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,1,4,0,0.016238400340080263
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,64,32,0,0.053518402576446536
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,16,32,0,0.0330128014087677
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,128,64,64,0,0.04439040124416351
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,1024,1,0,10.570419311523438
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,1024,4,0,2.764334487915039
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,64,1,0,0.5694464206695556
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,32,32,0,0.029865598678588866
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,64,8,0,0.12874560356140136
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,64,4,0,0.20094079971313478
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,64,16,0,0.09515519738197327
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,64,32,0,0.07738720178604126
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,128,64,64,0,0.06639519929885865
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,128,8,0,0.19936319589614868
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,32,8,0,0.07550560235977173
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,128,4,0,0.3304239988327026
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,128,2,0,0.591215991973877
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,128,16,0,0.13540639877319335
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,128,128,64,0,0.07844160199165344
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,128,1,0,1.1565792083740234
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,128,4,0,0.37079520225524903
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,128,2,0,0.6304512023925781
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,128,16,0,0.1762719988822937
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,128,8,0,0.24208319187164307
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,128,32,0,0.14155679941177368
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,128,128,64,0,0.11618239879608154
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,128,1,0,1.1777279853820801
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,256,8,0,0.4280367851257324
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,64,2,0,0.3135983943939209
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,256,16,0,0.2758336067199707
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,128,256,64,0,0.15123519897460938
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,128,32,0,0.10090399980545044
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,256,1,0,2.660465621948242
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,64,8,0,0.10576479434967041
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,256,4,0,0.8072992324829101
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,256,2,0,1.4728591918945313
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,256,8,0,0.5056335926055908
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,256,32,0,0.2751935958862305
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,256,4,0,0.734991979598999
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,128,256,64,0,0.22682559490203857
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,256,32,0,0.19783519506454467
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,256,2,0,1.3985887527465821
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,512,16,0,0.7330368041992188
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,512,32,0,0.4826528072357178
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,512,8,0,1.2372799873352052
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,512,4,0,1.8396335601806642
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,128,512,64,0,0.35858080387115476
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,256,16,0,0.351198410987854
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,256,1,0,2.727369689941406
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,512,2,0,3.562433624267578
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,512,8,0,1.3848447799682617
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,1,1,0,0.02752000093460083
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,1,2,0,0.023643200099468232
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,128,512,64,0,0.5075376033782959
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,512,32,0,0.6326367855072021
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,1,4,0,0.022779199481010436
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,1,32,0,0.022091199457645417
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,1,16,0,0.022171199321746826
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,256,1,64,0,0.02240000069141388
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,1,1,0,0.0340175986289978
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,1,2,0,0.029524800181388856
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,1,4,0,0.02898559868335724
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,1,8,0,0.028271999955177308
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,512,2,0,3.7037696838378906
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,1,32,0,0.02813119888305664
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,256,1,64,0,0.028252801299095152
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,16,1,0,0.2817296028137207
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,16,2,0,0.15684640407562256
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,16,4,0,0.09389920234680176
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,512,1,0,6.986339569091797
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,16,16,0,0.03940320014953613
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,256,16,64,0,0.035062399506568906
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,16,32,0,0.036950400471687316
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,16,1,0,0.2947119951248169
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,16,2,0,0.17527040243148803
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,16,4,0,0.10829759836196899
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,512,4,0,1.9942495346069335
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,16,8,0,0.0763584017753601
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,16,32,0,0.051177597045898436
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,512,16,0,0.8880816459655761
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,1,8,0,0.022655999660491942
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,512,1,0,6.954296112060547
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,32,2,0,0.2956223964691162
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,32,4,0,0.17095839977264404
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,32,1,0,0.559168004989624
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,32,8,0,0.10962560176849365
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,32,32,0,0.053329598903656
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,32,16,0,0.07368959784507752
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,256,32,64,0,0.048332801461219786
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,32,8,0,0.1310256004333496
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,32,2,0,0.3201280117034912
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,32,16,0,0.09685919880867004
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,32,32,0,0.07769920229911804
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,256,32,64,0,0.071670401096344
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,16,8,0,0.05970079898834228
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,64,4,0,0.32733759880065916
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,64,2,0,0.595801591873169
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,64,8,0,0.19682559967041016
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,64,1,0,1.0798848152160645
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,64,32,0,0.09996960163116456
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,16,16,0,0.05567520260810852
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,256,64,64,0,0.08063200116157532
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,256,16,64,0,0.04939199984073639
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,64,4,0,0.3633872032165527
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,64,2,0,0.6099040031433105
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,1,16,0,0.028191998600959778
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,64,8,0,0.2385103940963745
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,64,32,0,0.1399456024169922
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,256,64,64,0,0.12012319564819336
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,32,1,0,0.5624879837036133
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,128,4,0,0.6457856178283692
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,128,2,0,1.1685407638549805
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,64,16,0,0.13619359731674194
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,128,16,0,0.25402560234069826
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,128,32,0,0.1879536032676697
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,256,128,64,0,0.14570239782333375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,128,1,0,2.253099250793457
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,64,1,0,1.1206959724426269
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,64,16,0,0.1735759973526001
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,128,4,0,0.7153567790985107
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,32,4,0,0.19209760427474976
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,128,2,0,1.2431808471679688
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,128,8,0,0.45814080238342286
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,256,128,64,0,0.22361440658569337
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,128,1,0,2.329937553405762
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,256,8,0,0.8314255714416504
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,128,8,0,0.3826591968536377
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,256,16,0,0.5287807941436767
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,256,4,0,1.4404704093933105
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,256,32,0,0.37850399017333985
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,1,256,256,64,0,0.28280160427093504
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,256,2,0,2.792728042602539
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,256,8,0,0.9802288055419922
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,128,16,0,0.3303920030593872
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,128,32,0,0.2661679983139038
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,256,4,0,1.5825920104980469
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,1,1,0,0.013247999548912048
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,1,2,0,0.012883199751377106
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,256,16,0,0.6813471794128418
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,1,256,256,64,0,0.4322927951812744
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,1,16,0,0.012196800112724305
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,1,32,0,0.01207199990749359
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,1,64,0,0.012078399956226348
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,1,1,0,0.018811200559139252
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,1,2,0,0.01852000057697296
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,1,4,0,0.017745600640773775
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,1,8,0,0.017744000256061553
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,1,16,0,0.017744000256061553
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,1,32,0,0.01780160069465637
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,1,64,0,0.017587199807167053
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,16,1,0,0.015025599300861359
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,256,2,0,2.926252746582031
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,16,4,0,0.013899199664592743
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,256,1,0,5.283339309692383
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,16,16,0,0.013758400082588195
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,16,8,0,0.013601599633693695
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,16,64,0,0.012929600477218629
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,16,32,0,0.013363200426101684
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,16,2,0,0.019696000218391418
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,16,8,0,0.019764800369739533
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,16,16,0,0.01947840005159378
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,16,32,0,0.019079999625682832
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,16,64,0,0.01875839978456497
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,32,1,0,0.015636800229549407
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,32,2,0,0.015167999267578124
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,32,4,0,0.01409280002117157
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,32,8,0,0.013910399377346038
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,32,16,0,0.013870400190353394
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,32,32,0,0.013851200044155122
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,32,64,0,0.013702400028705597
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,32,1,0,0.021980799734592438
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,32,2,0,0.021214400231838227
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,256,1,0,5.402305603027344
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,32,4,0,0.019939200580120088
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,1,4,0,0.012323199957609176
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,32,16,0,0.019788800179958342
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,1,8,0,0.01223519966006279
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,32,8,0,0.019631999731063842
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,32,32,0,0.019313600659370423
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,32,64,0,0.019449600577354433
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,64,1,0,0.015836800634860992
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,64,2,0,0.015459200739860535
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,64,16,0,0.01385599970817566
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,16,2,0,0.013892799615859985
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,64,64,0,0.013676799833774567
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,64,1,0,0.022416000068187714
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,64,2,0,0.02170879989862442
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,64,4,0,0.020945599675178526
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,64,8,0,0.01974080055952072
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,64,16,0,0.019815999269485473
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,64,32,0,0.01961279958486557
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,64,64,0,0.01982080042362213
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,128,1,0,0.026796799898147584
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,128,2,0,0.0172447994351387
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,128,4,0,0.017019200325012206
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,128,8,0,0.016414399445056915
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,128,16,0,0.015598399937152863
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,128,32,0,0.015544000267982482
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,128,64,0,0.015292799472808838
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,128,1,0,0.035729598999023435
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,128,2,0,0.023417599499225616
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,128,4,0,0.023515200614929198
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,128,8,0,0.022126400470733644
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,128,16,0,0.021505600214004515
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,128,32,0,0.021121600270271303
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,128,64,0,0.02109760046005249
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,256,1,0,0.054099202156066895
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,256,2,0,0.028502398729324342
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,256,4,0,0.020931200683116914
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,256,8,0,0.01990559995174408
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,256,16,0,0.018904000520706177
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,256,32,0,0.018887999653816222
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,256,32,0,0.52674241065979
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,256,64,0,0.018675200641155243
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,256,1,0,0.06452800035476684
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,256,4,0,0.02699039876461029
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,256,8,0,0.0263808012008667
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,64,4,0,0.014745600521564484
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,256,32,0,0.02503040134906769
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,64,32,0,0.01390720009803772
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,256,64,0,0.024916799366474153
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,512,1,0,0.12076640129089355
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,512,4,0,0.035395199060440065
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,16,1,0,0.021011200547218323
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,16,4,0,0.019649599492549897
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,512,16,0,0.03605599999427796
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,512,64,0,0.024009600281715393
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,512,1,0,0.13200639486312865
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,512,2,0,0.0769536018371582
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,512,4,0,0.04344959855079651
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,512,8,0,0.033790400624275206
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,512,16,0,0.04236319959163666
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,512,32,0,0.035308799147605895
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,512,64,0,0.0308896005153656
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,1024,1,0,0.3273439884185791
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,256,2,0,0.03632160127162933
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,1024,2,0,0.1714192032814026
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,1024,4,0,0.09198880195617676
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,1024,16,0,0.040696001052856444
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,1024,32,0,0.0429504007101059
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,1024,64,0,0.03187040090560913
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,512,2,0,0.06670079827308655
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,1024,1,0,0.3365423917770386
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,1024,2,0,0.18216480016708375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,1024,4,0,0.10181280374526977
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,1024,8,0,0.05771200060844421
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,1024,16,0,0.0475488007068634
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,1024,32,0,0.05101280212402344
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,1024,64,0,0.039108800888061526
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,1536,4,0,0.16861599683761597
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,1536,2,0,0.3258320093154907
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,1536,8,0,0.09430400133132935
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,1536,16,0,0.055060797929763795
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,1536,1,0,0.6265503883361816
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,1536,32,0,0.05942879915237427
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,1536,64,0,0.042454400658607484
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,1536,4,0,0.17830079793930054
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,1536,2,0,0.33246400356292727
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,1536,8,0,0.104476797580719
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,1536,16,0,0.06331200003623963
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,1536,1,0,0.6364160060882569
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,1536,32,0,0.07066720128059387
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,1536,64,0,0.05175359845161438
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,2048,4,0,0.26961119174957277
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,2048,2,0,0.5279232025146484
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,2048,8,0,0.14145920276641846
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,256,16,0,0.025352001190185547
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,2048,32,0,0.06756160259246827
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,2048,1,0,1.0071344375610352
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,512,8,0,0.027102398872375488
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,2048,2,0,0.5319359779357911
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,2048,8,0,0.1533360004425049
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,2048,4,0,0.2789232015609741
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,2048,16,0,0.08835840225219727
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,2048,32,0,0.07788479924201966
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,2048,64,0,0.07052159905433655
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,3072,2,0,1.070702362060547
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,1024,8,0,0.04895359873771667
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,3072,4,0,0.5444863796234131
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,64,8,0,0.015041600167751312
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,3072,1,0,2.0923967361450195
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,3072,16,0,0.16136319637298585
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,2048,64,0,0.05847039818763733
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,3072,64,0,0.08145279884338379
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,3072,4,0,0.6273519992828369
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,2048,1,0,1.0135151863098144
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,3072,2,0,1.0706432342529297
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,3072,8,0,0.30649600028991697
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,3072,16,0,0.17348959445953369
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,3072,32,0,0.11049599647521972
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,3072,64,0,0.09952800273895264
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,3072,1,0,2.077529525756836
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,4096,4,0,0.9369824409484864
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,4096,8,0,0.46683359146118164
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,4096,32,0,0.13466880321502686
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,4096,64,0,0.12677279710769654
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,4096,2,0,1.805855941772461
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,2048,16,0,0.07617440223693847
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,3072,8,0,0.2799376010894775
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,4096,8,0,0.47673759460449217
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,4096,4,0,0.9173392295837403
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,512,32,0,0.02852639853954315
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,4096,32,0,0.14993280172348022
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,4096,64,0,0.13916319608688354
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,4096,1,0,3.915089416503906
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,4096,2,0,1.9605951309204102
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,4096,16,0,0.2590863943099976
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,4096,1,0,3.7111263275146484
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,3072,32,0,0.09819039702415466
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,6144,4,0,2.0947120666503904
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,6144,16,0,0.5474527835845947
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,6144,8,0,0.9939040184020996
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,6144,64,0,0.20181601047515868
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,4096,16,0,0.259278392791748
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,6144,8,0,0.9864607810974121
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,6144,2,0,4.358587265014648
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,6144,4,0,2.037094306945801
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,6144,16,0,0.5192848205566406
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,6144,32,0,0.3436160087585449
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,6144,64,0,0.21765921115875245
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,6144,32,0,0.28987998962402345
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,6144,2,0,4.138332748413086
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,8192,32,0,0.4726895809173584
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,8192,16,0,0.8894111633300781
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,8192,64,0,0.2541039943695068
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,8192,8,0,1.718191909790039
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,6144,1,0,8.658604431152344
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,8192,4,0,3.7169921875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,8192,16,0,0.9814127922058106
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,8192,8,0,1.8811439514160155
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,8192,64,0,0.275547194480896
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,8192,32,0,0.5187376022338868
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,8192,4,0,3.657134246826172
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,8192,2,0,7.599282836914062
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,6144,1,0,8.261558532714844
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,8192,2,0,7.394916534423828
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,10240,8,0,2.94627685546875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,10240,16,0,1.3330047607421875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,10240,32,0,0.7735151767730712
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,10240,64,0,0.4431295871734619
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,10240,4,0,5.917750549316406
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,8192,1,0,15.220796203613281
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,10240,8,0,2.8996608734130858
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,10240,16,0,1.4428144454956056
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,10240,32,0,0.7727168083190918
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,10240,4,0,5.7031902313232425
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,10240,64,0,0.4386591911315918
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,8192,1,0,14.689743041992188
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,10240,2,0,11.825424194335938
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,12288,16,0,2.080414390563965
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,12288,32,0,1.0607872009277344
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,12288,64,0,0.6143727779388428
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,10240,2,0,11.483708953857422
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,12288,4,0,8.455687713623046
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,12288,8,0,4.271867370605468
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,10240,1,0,24.19532928466797
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,12288,16,0,2.1491743087768556
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,12288,32,0,1.0562751770019532
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,12288,8,0,3.886548614501953
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,10240,1,0,22.33548583984375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,12288,64,0,0.5995471954345704
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,12288,4,0,8.700185394287109
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,12288,2,0,18.137875366210938
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,16384,16,0,3.753790283203125
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,12288,2,0,17.327674865722656
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,16384,32,0,1.8814031600952148
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,16384,64,0,0.9380592346191406
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,16384,8,0,7.499816131591797
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,16384,4,0,14.941278076171875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,12288,1,0,34.923178100585936
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,12288,1,0,32.3964599609375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,16384,16,0,3.5710784912109377
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,16384,64,0,0.9770463943481446
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,16384,4,0,14.27830810546875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,16384,32,0,1.7932416915893554
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,16384,8,0,7.1239501953125
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,16384,2,0,30.618588256835938
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,16384,2,0,29.244467163085936
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,1,32768,32,0,7.8133697509765625
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,1,32768,16,0,14.785903930664062
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,1,32768,64,0,3.8192256927490233
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,1,32768,8,0,29.321078491210937
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,16384,1,0,61.512298583984375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,16384,1,0,58.577325439453126
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,1,32768,16,0,14.778398132324218
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,1,32768,4,0,59.520330810546874
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,1,32768,64,0,3.5712352752685548
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,1,1,0,0.014553600549697876
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,1,2,0,0.013876800239086152
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,1,4,0,0.01343040019273758
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,1,8,0,0.013265599310398103
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,1,16,0,0.013177600502967835
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,1,32,0,0.013099199533462525
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,1,64,0,0.013184000551700593
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,1,1,0,0.02038719952106476
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,1,2,0,0.019332799315452575
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,1,4,0,0.01852159947156906
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,1,32768,32,0,7.255265808105468
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,1,16,0,0.018990400433540344
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,1,32,0,0.01890240013599396
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,1,64,0,0.019179199635982514
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,16,1,0,0.016945600509643555
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,1,32768,8,0,29.1573974609375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,16,2,0,0.01675039976835251
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,16,4,0,0.015939199924468996
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,16,8,0,0.015542399883270264
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,16,16,0,0.015432000160217285
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,16,32,0,0.014838400483131408
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,16,64,0,0.014299200475215912
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,16,1,0,0.023630400002002717
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,16,2,0,0.02311840057373047
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,16,4,0,0.022174400091171265
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,1,8,0,0.01812160015106201
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,16,8,0,0.021828800439834595
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,16,16,0,0.019832000136375427
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,16,64,0,0.01902720034122467
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,32,1,0,0.015939199924468996
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,32,2,0,0.015596799552440643
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,32,4,0,0.015078400075435639
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,32,8,0,0.015043200552463531
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,32,16,0,0.013940800726413728
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,32,32,0,0.01404000073671341
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,32,64,0,0.013475200533866883
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,32,1,0,0.022550399601459502
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,32,2,0,0.02200160026550293
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,32,4,0,0.021011200547218323
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,32,8,0,0.020289599895477295
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,32,16,0,0.020001600682735442
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,32,32,0,0.01979999989271164
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,32,64,0,0.01940000057220459
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,64,1,0,0.02466239929199219
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,64,2,0,0.01615840047597885
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,64,4,0,0.015505599975585937
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,64,8,0,0.015252800285816192
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,64,16,0,0.014294399321079254
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,64,32,0,0.014151999354362487
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,64,64,0,0.01408960074186325
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,64,1,0,0.032118400931358336
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,64,2,0,0.02269600033760071
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,64,4,0,0.021483199298381807
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,64,8,0,0.02110240012407303
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,64,16,0,0.021214400231838227
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,64,32,0,0.020022399723529816
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,64,64,0,0.019966399669647215
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,128,1,0,0.04812160134315491
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,128,2,0,0.025065600872039795
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,128,4,0,0.017660799622535705
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,128,8,0,0.016760000586509706
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,128,16,0,0.016774399578571318
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,128,32,0,0.016264000535011293
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,128,64,0,0.01592160016298294
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,128,1,0,0.0572704017162323
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,128,2,0,0.03255040049552917
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,128,4,0,0.024031999707221984
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,128,8,0,0.02322240024805069
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,128,16,0,0.022793599963188173
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,128,32,0,0.023003199696540834
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,128,64,0,0.02211039960384369
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,256,1,0,0.09688320159912109
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,256,2,0,0.0541375994682312
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,256,4,0,0.02924000024795532
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,256,8,0,0.021329599618911742
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,256,16,0,0.02086080014705658
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,256,32,0,0.020732800662517547
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,256,64,0,0.02009119987487793
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,16,32,0,0.02115360051393509
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,256,1,0,0.10722240209579467
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,256,2,0,0.0642192006111145
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,256,4,0,0.038273599743843076
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,256,8,0,0.028112000226974486
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,256,16,0,0.027393600344657897
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,256,32,0,0.027235201001167296
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,256,64,0,0.026604801416397095
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,512,2,0,0.12177439928054809
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,512,1,0,0.2278991937637329
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,512,4,0,0.06695359945297241
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,512,16,0,0.033583998680114746
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,512,32,0,0.037406399846076965
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,512,64,0,0.02975359857082367
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,512,2,0,0.1316655993461609
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,512,4,0,0.07681599855422974
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,512,8,0,0.045372799038887024
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,512,16,0,0.040910398960113524
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,512,32,0,0.04540640115737915
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,512,64,0,0.03728159964084625
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,1024,1,0,0.6346720218658447
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,1024,2,0,0.3277343988418579
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,1024,4,0,0.17197920083999635
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,1024,8,0,0.09281119704246521
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,1024,16,0,0.052052801847457884
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,1024,32,0,0.04873439967632294
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,512,8,0,0.036873599886894225
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,1024,64,0,0.045902401208877563
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,512,1,0,0.23590080738067626
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,1024,2,0,0.3374959945678711
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,1024,1,0,0.6380943775177002
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,1024,4,0,0.18395520448684693
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,1024,16,0,0.06516000032424926
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,1024,64,0,0.05805439949035644
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,1536,1,0,1.2199695587158204
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,1536,2,0,0.6876192092895508
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,1536,4,0,0.34778079986572263
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,1024,8,0,0.10474239587783814
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,1024,32,0,0.05891680121421814
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,1536,8,0,0.1715183973312378
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,1536,16,0,0.098198401927948
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,1536,32,0,0.06606400012969971
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,1536,64,0,0.07203519940376282
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,1536,2,0,0.6434080123901367
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,1536,1,0,1.2382927894592286
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,1536,4,0,0.33730239868164064
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,1536,16,0,0.11208159923553467
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,1536,8,0,0.18468639850616456
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,1536,32,0,0.07849599719047547
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,1536,64,0,0.08618720173835755
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,1,32768,4,0,56.89201049804687
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,2048,2,0,1.0482576370239258
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,2048,8,0,0.27487199306488036
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,2048,4,0,0.5883920192718506
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,2048,16,0,0.14824800491333007
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,2048,32,0,0.0866703987121582
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,2048,64,0,0.0790063977241516
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,2048,2,0,1.039140796661377
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,2048,4,0,0.5374256134033203
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,2048,1,0,2.0740432739257812
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,2048,8,0,0.2902911901473999
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,2048,64,0,0.09327999949455261
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,2048,32,0,0.10466400384902955
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,2048,1,0,2.0024208068847655
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,3072,2,0,2.2300447463989257
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,3072,4,0,1.0775551795959473
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,3072,16,0,0.30829439163208006
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,3072,8,0,0.6074079990386962
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,3072,32,0,0.18050880432128907
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,3072,64,0,0.1224128007888794
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,2048,16,0,0.16229920387268065
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,3072,1,0,4.692009735107422
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,3072,4,0,1.0837136268615724
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,3072,16,0,0.3133984088897705
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,3072,32,0,0.1922368049621582
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,3072,2,0,2.2083808898925783
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,3072,1,0,4.37370719909668
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,3072,8,0,0.6262224197387696
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,3072,64,0,0.13553760051727295
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,4096,8,0,0.9348447799682618
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,4096,16,0,0.4871808052062988
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,4096,32,0,0.27054879665374754
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,4096,2,0,3.9756702423095702
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,4096,64,0,0.15368959903717042
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,4096,1,0,8.001787567138672
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,4096,4,0,1.8792863845825196
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,4096,2,0,3.7857662200927735
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,4096,4,0,1.9667503356933593
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,4096,32,0,0.3026432037353516
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,4096,16,0,0.5461775779724121
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,4096,64,0,0.1756399989128113
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,4096,1,0,7.72071533203125
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,4096,8,0,0.9372415542602539
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,6144,8,0,2.023740768432617
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,6144,16,0,1.0683648109436035
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,6144,32,0,0.5783936023712158
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,6144,4,0,4.341422271728516
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,6144,64,0,0.3310159921646118
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,1,32768,2,0,119.50084228515625
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,6144,2,0,8.984436798095704
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,6144,8,0,2.1083744049072264
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,6144,16,0,1.0870512008666993
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,6144,4,0,4.276883316040039
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,6144,32,0,0.5884560108184814
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,6144,64,0,0.3743423938751221
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,6144,2,0,8.508654022216797
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,6144,1,0,17.705027770996093
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,8192,8,0,3.839601516723633
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,8192,16,0,1.923975944519043
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,8192,4,0,7.801700592041016
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,6144,1,0,17.192344665527344
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,8192,64,0,0.5298880100250244
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,8192,32,0,0.998316764831543
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,8192,2,0,15.843254089355469
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,1,32768,2,0,115.9652587890625
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,8192,16,0,1.8334896087646484
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,8192,8,0,3.667726516723633
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,8192,32,0,0.9792256355285645
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,8192,64,0,0.5658080101013183
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,8192,4,0,8.042947387695312
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,8192,2,0,15.545169067382812
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,8192,1,0,30.872406005859375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,10240,8,0,6.109841537475586
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,10240,16,0,2.985812759399414
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,10240,32,0,1.4988431930541992
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,10240,4,0,11.895748901367188
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,10240,64,0,0.8528047561645508
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,8192,1,0,29.477029418945314
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,10240,2,0,24.29231414794922
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,10240,8,0,5.85079345703125
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,10240,16,0,2.8969951629638673
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,10240,64,0,0.8715871810913086
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,10240,32,0,1.5265536308288574
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,10240,4,0,11.348187255859376
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,10240,2,0,23.36591339111328
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,12288,4,0,16.944789123535156
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,12288,8,0,8.755105590820312
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,10240,1,0,48.89503479003906
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,12288,32,0,2.148084831237793
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,12288,16,0,4.45203857421875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,12288,64,0,1.1613951683044434
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,10240,1,0,46.71565551757813
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,12288,2,0,34.216796875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,12288,16,0,4.157199859619141
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,12288,32,0,2.11419677734375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,12288,8,0,8.25498046875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,12288,64,0,1.1236031532287598
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,12288,4,0,16.41956481933594
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,12288,2,0,33.50208435058594
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,1,32768,1,0,239.4141845703125
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,12288,1,0,69.00231323242187
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,16384,16,0,7.602799987792968
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,16384,8,0,15.593161010742188
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,16384,32,0,3.838091278076172
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,1,32768,1,0,225.7130615234375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,16384,64,0,1.9082544326782227
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,16384,4,0,31.452224731445312
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,16384,16,0,7.3011619567871096
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,16384,32,0,3.6939456939697264
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,12288,1,0,65.14176025390626
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,16384,8,0,14.623149108886718
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,16384,64,0,1.9295183181762696
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,16384,2,0,60.449981689453125
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,16384,4,0,28.938540649414062
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,16384,2,0,58.164678955078124
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,2,32768,16,0,29.54802551269531
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,2,32768,64,0,8.31277084350586
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,2,32768,32,0,15.46795196533203
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,2,32768,8,0,60.21144409179688
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,16384,1,0,123.12470703125
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,16384,1,0,117.61693115234375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,2,32768,4,0,121.90838623046875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,2,32768,16,0,29.44442138671875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,2,32768,8,0,58.49320068359375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,1,1,0,0.015296000242233276
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,1,2,0,0.0137472003698349
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,1,4,0,0.013279999792575835
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,1,8,0,0.012985600531101227
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,1,16,0,0.012736000120639801
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,1,32,0,0.012747199833393097
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,1,64,0,0.012753599882125854
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,1,1,0,0.019595199823379518
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,1,2,0,0.018947200477123262
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,1,4,0,0.018539200723171233
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,1,8,0,0.018199999630451203
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,1,16,0,0.017998400330543517
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,1,32,0,0.01783519983291626
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,1,64,0,0.018006399273872375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,16,1,0,0.016011199355125426
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,16,2,0,0.015526400506496429
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,16,4,0,0.01518079936504364
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,16,8,0,0.014177599549293518
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,16,16,0,0.014102399349212646
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,16,32,0,0.013742400705814362
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,16,64,0,0.013166399300098419
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,16,1,0,0.022486400604248048
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,16,2,0,0.021692800521850585
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,16,4,0,0.021652799844741822
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,2,32768,32,0,14.83841552734375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,16,8,0,0.020446400344371795
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,16,32,0,0.019575999677181245
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,16,64,0,0.019032000005245207
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,32,1,0,0.024260799586772918
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,2,32768,64,0,7.498448181152344
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,32,2,0,0.016012799739837647
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,32,8,0,0.015371200442314149
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,32,4,0,0.01701119989156723
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,32,32,0,0.014856000244617463
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,32,64,0,0.014468799531459808
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,32,1,0,0.03397279977798462
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,32,2,0,0.022995199263095855
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,32,4,0,0.02266400009393692
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,32,8,0,0.02205760031938553
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,32,16,0,0.02101760059595108
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,32,32,0,0.02070080041885376
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,32,64,0,0.02040479928255081
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,64,1,0,0.0463808000087738
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,64,2,0,0.024719999730587007
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,64,4,0,0.01685120016336441
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,64,8,0,0.016412800550460814
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,64,16,0,0.016344000399112702
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,64,32,0,0.015057599544525147
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,64,64,0,0.014988799393177033
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,64,1,0,0.05566080212593079
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,64,2,0,0.030904000997543334
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,64,4,0,0.023025600612163542
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,64,8,0,0.022519999742507936
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,16,16,0,0.02212799936532974
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,64,16,0,0.021646399796009064
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,64,32,0,0.02152000069618225
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,64,64,0,0.02040639966726303
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,128,2,0,0.0475488007068634
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,128,4,0,0.027251198887825012
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,32,16,0,0.014142400026321411
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,128,16,0,0.01780479997396469
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,128,32,0,0.017665599286556245
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,128,64,0,0.016896000504493712
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,128,1,0,0.08475840091705322
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,128,1,0,0.09497119784355164
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,128,2,0,0.058432000875473025
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,128,4,0,0.03533599972724914
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,128,8,0,0.0248879998922348
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,128,16,0,0.024369600415229797
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,128,32,0,0.024040000140666963
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,128,64,0,0.023228800296783446
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,256,1,0,0.17712800502777098
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,256,2,0,0.09676799774169922
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,256,4,0,0.05557119846343994
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,256,8,0,0.033287999033927915
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,256,16,0,0.02268960028886795
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,256,32,0,0.022260800004005432
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,256,64,0,0.022123199701309205
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,256,1,0,0.18723520040512084
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,256,2,0,0.1068511962890625
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,256,4,0,0.06604959964752197
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,256,8,0,0.04180479943752289
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,256,16,0,0.030035200715065002
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,256,32,0,0.029388800263404846
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,128,8,0,0.018244799971580506
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,512,2,0,0.22708959579467775
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,512,1,0,0.4295008182525635
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,512,4,0,0.12238559722900391
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,512,8,0,0.06923840045928956
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,512,16,0,0.054364800453186035
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,512,64,0,0.04015359878540039
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,512,1,0,0.44387359619140626
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,512,2,0,0.2387903928756714
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,512,4,0,0.13423999547958373
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,512,8,0,0.08167840242385864
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,512,16,0,0.0674560010433197
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,512,32,0,0.0466511994600296
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,512,64,0,0.051988798379898074
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,256,64,0,0.029334399104118346
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,1024,2,0,0.6390592098236084
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,512,32,0,0.03686079978942871
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,1024,8,0,0.1761631965637207
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,1024,4,0,0.33255040645599365
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,1024,16,0,0.09945279955863953
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,1024,32,0,0.07820799946784973
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,1024,64,0,0.054985600709915164
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,1024,2,0,0.6516928195953369
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,1024,1,0,1.2519536018371582
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,1024,4,0,0.34623839855194094
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,1024,16,0,0.11501439809799194
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,1024,8,0,0.1934191942214966
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,1024,32,0,0.09545599818229675
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,1024,64,0,0.07013599872589112
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,1024,1,0,1.2498448371887207
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,1536,2,0,1.262294387817383
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,1536,4,0,0.6378496170043946
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,1536,8,0,0.3336319923400879
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,1536,32,0,0.12294559478759766
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,1536,64,0,0.08823360204696655
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,1536,1,0,2.544001579284668
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,1536,2,0,1.3453568458557128
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,1536,4,0,0.6787568092346191
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,1536,1,0,2.5203119277954102
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,1536,8,0,0.3566287994384766
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,1536,16,0,0.20884480476379394
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,1536,64,0,0.11419999599456787
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,1536,32,0,0.14475040435791015
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,2048,1,0,4.412883377075195
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,2048,4,0,1.1278191566467286
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,2048,2,0,2.079105567932129
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,2048,16,0,0.30315361022949217
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,2048,8,0,0.5835040092468262
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,2048,32,0,0.1682479977607727
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,2048,64,0,0.13922560214996338
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,1536,16,0,0.17996959686279296
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,2048,4,0,1.0507696151733399
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,2048,2,0,2.1452848434448244
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,2048,8,0,0.5495872020721435
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,2048,32,0,0.1833583950996399
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,2048,16,0,0.3222431898117065
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,2048,64,0,0.15178879499435424
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,2048,1,0,4.241463851928711
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,3072,4,0,2.3065984725952147
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,3072,2,0,4.65631217956543
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,3072,8,0,1.23056001663208
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,3072,16,0,0.6224431991577148
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,3072,32,0,0.34790079593658446
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,3072,64,0,0.22507998943328858
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,3072,1,0,9.594316864013672
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,3072,2,0,4.553260803222656
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,3072,4,0,2.2948720932006834
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,3072,8,0,1.1792415618896483
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,3072,32,0,0.35403521060943605
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,3072,16,0,0.6256703853607177
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,3072,64,0,0.2430095911026001
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,2,32768,4,0,114.29144287109375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,3072,1,0,9.096331024169922
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,4096,8,0,1.9381568908691407
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,4096,4,0,4.043337631225586
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,4096,16,0,1.0679247856140137
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,4096,32,0,0.5786479949951172
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,4096,64,0,0.31131839752197266
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,4096,2,0,8.080328369140625
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,4096,2,0,7.893608093261719
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,4096,1,0,16.791476440429687
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,4096,16,0,1.0991920471191405
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,4096,4,0,4.187441635131836
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,4096,32,0,0.5566256046295166
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,4096,64,0,0.36789119243621826
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,4096,1,0,15.782717895507812
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,4096,8,0,1.8605600357055665
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,6144,8,0,4.266233444213867
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,6144,16,0,2.2496511459350588
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,6144,32,0,1.175774383544922
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,6144,4,0,8.970403289794922
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,6144,64,0,0.6306863784790039
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,6144,2,0,18.126608276367186
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,6144,4,0,8.593280029296874
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,6144,2,0,17.11121063232422
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,2,32768,2,0,239.2152099609375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,6144,8,0,4.354111862182617
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,6144,16,0,2.1375999450683594
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,6144,64,0,0.6897999763488769
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,6144,32,0,1.190220832824707
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,6144,1,0,36.39725036621094
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,8192,8,0,7.8962348937988285
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,6144,1,0,34.10313110351562
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,8192,32,0,1.9086624145507813
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,8192,16,0,3.937713623046875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,8192,4,0,15.611737060546876
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,8192,64,0,1.0485648155212401
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,8192,2,0,31.666036987304686
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,8192,4,0,15.03763427734375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,8192,16,0,3.5423648834228514
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,8192,8,0,7.621766662597656
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,8192,64,0,1.0913760185241699
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,8192,32,0,1.9741647720336915
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,2,32768,2,0,228.9496337890625
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,8192,2,0,30.045211791992188
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,10240,8,0,12.296495819091797
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,8192,1,0,63.16401977539063
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,10240,16,0,6.043521499633789
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,10240,32,0,3.0180431365966798
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,10240,64,0,1.5946895599365234
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,10240,4,0,24.310243225097658
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,8192,1,0,60.52430419921875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,10240,8,0,11.912564849853515
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,10240,2,0,48.42863159179687
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,10240,32,0,2.94443359375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,10240,4,0,23.376815795898438
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,10240,16,0,5.945188903808594
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,10240,64,0,1.6209152221679688
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,10240,2,0,46.14744567871094
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,12288,4,0,35.32583312988281
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,12288,8,0,17.358859252929687
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,12288,32,0,4.3394817352294925
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,12288,64,0,2.2249824523925783
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,12288,16,0,8.528972625732422
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,10240,1,0,97.89969482421876
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,10240,1,0,93.50986328125
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,12288,2,0,70.2552490234375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,12288,4,0,33.60588073730469
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,12288,16,0,8.542635345458985
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,12288,64,0,2.325971221923828
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,12288,8,0,16.963380432128908
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,12288,32,0,4.318974304199219
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,12288,2,0,66.01234130859375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,12288,1,0,138.443798828125
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,4,16384,8,0,30.748602294921874
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,2,32768,1,0,478.856640625
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,4,16384,4,0,62.126104736328124
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,4,16384,64,0,3.80328483581543
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,4,16384,32,0,7.6655120849609375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,4,16384,16,0,15.559024047851562
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,2,32768,1,0,452.902880859375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,12288,1,0,131.4787841796875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,4,16384,32,0,7.392910766601562
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,4,16384,8,0,29.477139282226563
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,1,1,0,0.01518079936504364
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,1,2,0,0.014504000544548035
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,1,4,0,0.013766400516033173
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,1,8,0,0.013084800541400909
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,1,16,0,0.013044799864292144
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,1,32,0,0.013059200346469879
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,1,64,0,0.013115200400352477
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,1,1,0,0.020291200280189513
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,1,2,0,0.019707199931144715
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,1,4,0,0.019215999543666838
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,1,8,0,0.019249600172042847
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,1,16,0,0.01914079934358597
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,1,32,0,0.019079999625682832
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,1,64,0,0.01897120028734207
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,16,1,0,0.025443199276924133
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,16,2,0,0.016628800332546233
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,16,4,0,0.01607840061187744
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,16,8,0,0.0152319997549057
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,16,16,0,0.014511999487876893
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,16,32,0,0.013870400190353394
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,16,64,0,0.013441599905490875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,4,16384,64,0,3.798779296875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,16,1,0,0.03215680122375488
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,16,4,0,0.02202560007572174
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,16,2,0,0.02412640005350113
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,16,8,0,0.021657599508762358
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,16,16,0,0.021240000426769257
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,4,16384,16,0,14.504776000976562
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,16,32,0,0.019819200038909912
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,32,1,0,0.04574080109596253
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,16,64,0,0.020095999538898467
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,32,2,0,0.023971199989318848
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,32,4,0,0.01703999936580658
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,32,8,0,0.01589120030403137
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,32,16,0,0.01610880047082901
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,32,32,0,0.015385599434375763
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,32,64,0,0.014923200011253357
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,32,1,0,0.05432639718055725
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,32,2,0,0.03512159883975983
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,32,4,0,0.023219199478626253
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,32,8,0,0.022951999306678773
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,32,32,0,0.0224031999707222
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,32,64,0,0.02091200053691864
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,64,1,0,0.08274719715118409
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,64,2,0,0.047356799244880676
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,64,4,0,0.024899199604988098
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,64,8,0,0.0172992005944252
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,64,16,0,0.016697600483894348
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,64,32,0,0.01639840006828308
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,64,64,0,0.015483200550079346
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,64,1,0,0.08986719846725463
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,64,2,0,0.056004798412323
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,64,4,0,0.03224479854106903
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,64,8,0,0.024120000004768372
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,64,16,0,0.023137600719928743
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,64,32,0,0.023343999683856965
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,64,64,0,0.02191839963197708
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,128,1,0,0.15510720014572144
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,128,2,0,0.0851631999015808
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,128,4,0,0.04967359900474548
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,128,8,0,0.02861439883708954
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,128,16,0,0.019985599815845488
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,32,16,0,0.02178879976272583
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,128,32,0,0.019547200202941893
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,128,64,0,0.01934240013360977
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,128,1,0,0.1634752035140991
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,128,2,0,0.0949679970741272
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,128,4,0,0.06036800146102905
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,128,16,0,0.02671839892864227
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,128,32,0,0.02627519965171814
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,128,64,0,0.026291200518608095
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,256,1,0,0.3352720022201538
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,256,2,0,0.17884960174560546
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,256,4,0,0.10000959634780884
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,256,8,0,0.05884479880332947
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,256,16,0,0.03725920021533966
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,256,32,0,0.02589600086212158
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,256,64,0,0.02577120065689087
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,256,1,0,0.34788639545440675
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,256,2,0,0.19096800088882446
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,256,4,0,0.1116096019744873
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,256,8,0,0.07151039838790893
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,128,8,0,0.03680639863014221
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,256,16,0,0.05119199752807617
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,256,32,0,0.03577919900417328
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,512,1,0,0.8442383766174316
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,512,4,0,0.23468799591064454
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,512,2,0,0.4395616054534912
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,512,8,0,0.12735519409179688
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,512,16,0,0.09423360228538513
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,512,32,0,0.06333119869232177
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,512,64,0,0.04323520064353943
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,512,2,0,0.4554351806640625
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,512,1,0,0.8541168212890625
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,512,4,0,0.2508928060531616
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,512,16,0,0.10992480516433716
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,512,32,0,0.08027520179748535
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,256,64,0,0.03510720133781433
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,512,64,0,0.05685439705848694
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,1024,2,0,1.2593440055847167
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,1024,4,0,0.6507232189178467
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,1024,1,0,2.4887983322143556
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,1024,8,0,0.33967840671539307
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,1024,16,0,0.20056641101837158
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,512,8,0,0.14375679492950438
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,1024,64,0,0.10253920555114746
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,1024,2,0,1.3248703956604004
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,4,16384,2,0,123.73580322265624
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,1024,4,0,0.6772992134094238
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,1024,1,0,2.4685359954833985
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,1024,16,0,0.2123392105102539
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,1024,8,0,0.43030557632446287
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,1024,32,0,0.1652351975440979
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,1024,64,0,0.12058240175247192
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,4,16384,4,0,58.33201904296875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,1024,32,0,0.13921120166778564
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,1536,16,0,0.34801440238952636
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,1536,8,0,0.7180031776428223
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,1536,4,0,1.2669808387756347
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,1536,64,0,0.16255040168762208
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,1536,2,0,2.594998359680176
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,1536,2,0,2.574929618835449
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,1536,32,0,0.22801918983459474
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,1536,8,0,0.692844820022583
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,1536,1,0,5.279595184326172
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,1536,16,0,0.3801392078399658
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,1536,32,0,0.2632496118545532
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,1536,64,0,0.18558080196380616
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,1536,1,0,5.036783981323242
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,1536,4,0,1.3843567848205567
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,2048,4,0,2.1691856384277344
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,2048,8,0,1.0568079948425293
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,2048,32,0,0.31376960277557375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,2048,16,0,0.6130832195281982
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,2048,64,0,0.2241215944290161
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,2048,2,0,4.3517311096191404
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,2048,4,0,2.2023151397705076
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,2048,16,0,0.6255680084228515
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,2048,8,0,1.0846128463745117
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,2048,32,0,0.35733280181884763
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,2048,64,0,0.2619136095046997
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,2048,2,0,4.284947204589844
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,2048,1,0,8.955248260498047
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,2048,1,0,8.447705841064453
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,3072,4,0,4.708454513549805
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,3072,16,0,1.1584480285644532
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,3072,32,0,0.6647664070129394
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,3072,64,0,0.41759519577026366
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,3072,8,0,2.189630317687988
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,3072,2,0,9.649667358398437
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,3072,4,0,4.614907073974609
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,3072,8,0,2.3376495361328127
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,3072,16,0,1.216865634918213
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,3072,32,0,0.6858128070831299
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,3072,64,0,0.460968017578125
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,3072,2,0,9.056795501708985
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,3072,1,0,18.917630004882813
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,4096,8,0,3.865937423706055
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,4096,4,0,8.618436431884765
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,3072,1,0,18.2779541015625
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,4096,64,0,0.5852960109710693
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,4096,32,0,1.0907999992370605
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,4096,16,0,2.2238832473754884
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,4096,2,0,16.2541259765625
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,4096,8,0,4.032403182983399
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,4096,16,0,2.076321601867676
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,4096,4,0,7.8166015625
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,4096,32,0,1.152286434173584
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,4096,64,0,0.6746895790100098
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,4,16384,2,0,116.06552734375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,4096,2,0,15.580642700195312
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,4096,1,0,33.87996826171875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,6144,8,0,8.943231964111328
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,6144,16,0,4.537192153930664
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,6144,32,0,2.2172128677368166
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,6144,64,0,1.2585824012756348
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,4096,1,0,31.404354858398438
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,6144,4,0,18.02703094482422
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,6144,8,0,8.693231964111328
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,6144,16,0,4.449137496948242
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,6144,2,0,36.15870971679688
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,6144,32,0,2.284659194946289
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,6144,4,0,17.675624084472656
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,6144,64,0,1.3087632179260253
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,6144,2,0,35.17692260742187
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,8,8192,8,0,16.05132293701172
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,8,8192,4,0,31.554080200195312
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,6144,1,0,70.99891967773438
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,4,16384,1,0,251.48779296875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,8,8192,64,0,2.0704671859741213
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,8,8192,32,0,4.034526443481445
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,8,8192,16,0,8.258214569091797
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,6144,1,0,67.5411376953125
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,8,8192,8,0,15.37706298828125
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,8,8192,16,0,7.6698974609375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,8,8192,64,0,2.102729606628418
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,1,1,0,0.015244799852371215
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,1,2,0,0.014500799775123595
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,1,4,0,0.013697600364685059
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,1,8,0,0.013275200128555298
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,1,16,0,0.013073599338531494
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,1,32,0,0.013152000308036805
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,1,64,0,0.012868799269199371
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,1,1,0,0.020683200657367708
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,1,2,0,0.01943040043115616
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,1,4,0,0.018654400110244752
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,1,8,0,0.018303999304771425
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,1,16,0,0.01841440051794052
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,1,32,0,0.018430399894714355
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,1,64,0,0.018427200615406036
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,16,1,0,0.045433598756790164
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,16,2,0,0.02385759949684143
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,16,4,0,0.016896000504493712
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,16,8,0,0.016062399744987486
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,16,16,0,0.015751999616622925
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,16,32,0,0.014375999569892883
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,16,64,0,0.013971200585365296
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,16,1,0,0.05491840243339539
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,16,2,0,0.030771198868751525
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,16,4,0,0.022891199588775633
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,16,8,0,0.022008000314235686
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,16,16,0,0.02160159945487976
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,16,32,0,0.021057599782943727
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,16,64,0,0.020230400562286376
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,32,1,0,0.08032799959182739
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,8,8192,32,0,3.9447216033935546
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,32,2,0,0.04563199877738953
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,32,4,0,0.02449440062046051
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,32,8,0,0.017187200486660004
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,32,16,0,0.016465599834918975
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,32,32,0,0.01626240015029907
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,32,64,0,0.01496639996767044
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,32,1,0,0.08909599781036377
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,32,2,0,0.055555200576782225
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,32,4,0,0.03241280019283295
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,32,8,0,0.023764799535274505
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,32,16,0,0.022918400168418885
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,32,32,0,0.023212799429893495
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,32,64,0,0.021465599536895752
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,64,1,0,0.14427839517593383
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,64,2,0,0.08208640217781067
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,64,4,0,0.04683200120925903
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,64,8,0,0.026305601000785828
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,64,16,0,0.018859200179576874
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,64,32,0,0.01808159947395325
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,64,64,0,0.018062399327754976
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,64,1,0,0.15399680137634278
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,64,2,0,0.09077759981155395
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,8,8192,2,0,62.88590698242187
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,8,8192,4,0,30.0734130859375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,64,8,0,0.038392001390457155
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,64,16,0,0.02829119861125946
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,64,64,0,0.027425599098205567
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,128,1,0,0.30082240104675295
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,128,2,0,0.16142879724502562
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,128,4,0,0.09024320244789123
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,128,8,0,0.0547327995300293
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,128,16,0,0.033379200100898745
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,128,32,0,0.023795199394226075
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,128,64,0,0.023681600391864777
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,128,1,0,0.3100064039230347
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,128,2,0,0.16892000436782836
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,128,4,0,0.10080959796905517
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,128,8,0,0.06579679846763611
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,64,4,0,0.0571727991104126
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,128,16,0,0.0452208012342453
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,128,32,0,0.03233599960803986
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,128,64,0,0.032120001316070554
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,256,4,0,0.1844591975212097
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,256,2,0,0.3415600061416626
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,64,32,0,0.025411200523376466
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,256,8,0,0.1029919981956482
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,256,16,0,0.06595360040664673
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,256,32,0,0.04506880044937134
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,256,2,0,0.3592560052871704
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,256,1,0,0.6611120223999023
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,256,4,0,0.20151679515838622
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,256,8,0,0.11911040544509888
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,256,32,0,0.06167200207710266
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,256,64,0,0.04612959921360016
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,512,1,0,1.6521888732910157
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,256,1,0,0.6512832164764404
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,256,64,0,0.03190560042858124
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,512,8,0,0.24250879287719726
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,512,4,0,0.4596367835998535
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,512,16,0,0.17043839693069457
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,512,2,0,0.866427230834961
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,512,32,0,0.1089967966079712
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,512,64,0,0.07743200063705444
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,256,16,0,0.08220319747924805
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,512,8,0,0.2667504072189331
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,512,4,0,0.47786879539489746
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,512,16,0,0.19398560523986816
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,512,32,0,0.13213119506835938
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,512,64,0,0.10167200565338134
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,512,1,0,1.6830352783203124
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,1024,4,0,1.2924639701843261
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,1024,2,0,2.499388885498047
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,1024,8,0,0.673905611038208
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,1024,16,0,0.3860127925872803
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,512,2,0,0.8933199882507324
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,1024,64,0,0.17086880207061766
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,4,16384,1,0,235.2298583984375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,1024,1,0,5.277107238769531
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,1024,8,0,0.7373871803283691
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,1024,4,0,1.510919952392578
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,1024,2,0,2.569248008728027
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,1024,16,0,0.4112351894378662
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,1024,64,0,0.2114880084991455
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,1024,32,0,0.2978431940078735
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,1024,1,0,5.001244735717774
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,1536,4,0,2.618615913391113
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,1536,8,0,1.3326224327087401
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,1024,32,0,0.2568336009979248
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,1536,32,0,0.4540559768676758
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,1536,64,0,0.2791167974472046
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,1536,2,0,5.241996765136719
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,1536,16,0,0.7277103900909424
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,1536,4,0,2.662883186340332
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,1536,8,0,1.3323792457580566
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,1536,16,0,0.756772804260254
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,1536,2,0,5.114523315429688
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,1536,32,0,0.4912367820739746
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,1536,64,0,0.3366336107254028
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,1536,1,0,10.879315185546876
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,8,8192,2,0,59.947540283203125
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,2048,8,0,2.266806411743164
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,1536,1,0,10.312359619140626
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,2048,4,0,4.417812728881836
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,2048,64,0,0.4467520236968994
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,2048,32,0,0.6314688205718995
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,2048,16,0,1.1998047828674316
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,2048,2,0,9.145649719238282
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,2048,8,0,2.1857471466064453
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,2048,4,0,4.443824005126953
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,2048,16,0,1.2435104370117187
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,2048,64,0,0.5188735961914063
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,2048,32,0,0.7074096202850342
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,2048,2,0,8.678337860107423
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,2048,1,0,17.746304321289063
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,3072,8,0,4.774476623535156
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,3072,16,0,2.385209655761719
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,3072,64,0,0.7829599857330323
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,3072,32,0,1.3171456336975098
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,2048,1,0,17.845362854003906
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,3072,4,0,9.568100738525391
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,8,8192,1,0,124.5036865234375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,3072,8,0,4.784688186645508
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,3072,2,0,19.464422607421874
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,3072,32,0,1.3633055686950684
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,3072,16,0,2.400281524658203
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,3072,64,0,0.9525487899780274
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,3072,4,0,9.23804168701172
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,3072,2,0,18.494406127929686
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,16,4096,8,0,8.361888122558593
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,16,4096,32,0,2.1133983612060545
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,16,4096,16,0,4.226665496826172
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,16,4096,64,0,1.2046159744262694
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,16,4096,4,0,16.431179809570313
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,3072,1,0,39.110879516601564
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,3072,1,0,36.20821838378906
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,16,4096,8,0,8.099676513671875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,16,4096,32,0,2.1798831939697267
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,8,8192,1,0,121.98145751953125
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,16,4096,64,0,1.272760009765625
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,16,4096,16,0,4.056654357910157
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,1,2,0,0.014724799990653991
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,1,4,0,0.01496479958295822
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,1,8,0,0.013462400436401368
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,1,16,0,0.014575999975204468
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,1,64,0,0.014560000598430633
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,32,1,1,0,0.021928000450134277
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,1,2,0,0.02118239998817444
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,1,4,0,0.020259200036525725
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,1,8,0,0.019787199795246124
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,1,16,0,0.01969760060310364
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,1,32,0,0.019351999461650848
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,1,64,0,0.019358399510383605
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,32,16,1,0,0.08193280100822449
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,16,2,0,0.04698559939861298
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,16,4,0,0.02557600140571594
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,16,8,0,0.017791999876499175
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,16,16,0,0.01748799979686737
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,16,4096,4,0,15.975221252441406
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,16,32,0,0.016624000668525696
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,16,4096,2,0,33.23957824707031
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,16,64,0,0.01740480065345764
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,16,2,0,0.06312159895896911
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,16,8,0,0.02712000012397766
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,16,16,0,0.025551998615264894
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,16,32,0,0.023652799427509308
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,32,1,1,0,0.01721920073032379
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,16,64,0,0.022819200158119203
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,1,32,0,0.01340479999780655
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,32,2,0,0.08459839820861817
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,32,32,1,0,0.14368480443954468
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,32,4,0,0.047679999470710756
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,32,8,0,0.027723199129104613
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,32,16,0,0.0191551998257637
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,32,64,0,0.018824000656604768
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,32,2,0,0.09448800086975098
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,32,4,0,0.05944799780845642
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,32,8,0,0.03603360056877136
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,32,16,0,0.026926401257514953
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,32,32,0,0.02566719949245453
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,32,64,0,0.024907200038433074
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,32,64,1,0,0.275984001159668
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,64,2,0,0.1468287944793701
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,32,16,1,0,0.09139999747276306
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,64,4,0,0.08560799956321716
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,64,8,0,0.05116639733314514
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,16,4,0,0.03934240043163299
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,64,32,0,0.022228799760341644
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,64,64,0,0.021988800168037413
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,64,2,0,0.16002720594406128
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,64,4,0,0.09642879962921143
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,64,8,0,0.06355680227279663
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,64,16,0,0.0438832014799118
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,64,32,0,0.032420799136161804
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,32,32,0,0.01849920004606247
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,64,64,0,0.03094879984855652
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,32,32,1,0,0.1534127950668335
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,128,2,0,0.2952768087387085
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,128,4,0,0.15843199491500853
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,32,128,1,0,0.5579584121704102
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,128,16,0,0.0596448004245758
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,128,32,0,0.04023039937019348
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,128,64,0,0.0297791987657547
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,64,16,0,0.030316799879074097
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,32,128,1,0,0.5765183925628662
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,128,2,0,0.31149280071258545
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,128,4,0,0.17318880558013916
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,32,64,1,0,0.28251841068267824
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,128,8,0,0.10930559635162354
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,128,16,0,0.07571679949760438
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,128,64,0,0.04346080124378204
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,256,2,0,0.6736656188964844
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,128,8,0,0.09381759762763978
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,32,256,1,0,1.271720027923584
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,256,8,0,0.19388320446014404
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,256,16,0,0.11737120151519775
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,256,32,0,0.07949439883232116
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,16,4096,2,0,32.176873779296876
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,32,256,1,0,1.3018207550048828
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,128,32,0,0.05788800120353699
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,256,8,0,0.21695199012756347
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,256,4,0,0.38416640758514403
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,256,16,0,0.14062880277633666
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,256,32,0,0.10338720083236694
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,256,2,0,0.7512191772460938
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,256,4,0,0.35887999534606935
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,256,64,0,0.05917760133743286
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,512,4,0,0.9042847633361817
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,512,2,0,1.7565519332885742
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,512,32,0,0.2004767894744873
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,512,64,0,0.13914719820022584
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,32,512,1,0,3.370161437988281
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,256,64,0,0.08308960199356079
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,512,8,0,0.5113887786865234
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,512,2,0,1.7635343551635743
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,512,4,0,0.9835087776184082
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,512,16,0,0.3640399932861328
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,512,32,0,0.23969759941101074
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,512,8,0,0.4706719875335693
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,512,64,0,0.17753920555114747
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,32,512,1,0,3.336324691772461
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,1024,16,0,0.7200687885284424
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,1024,8,0,1.315230369567871
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,1024,64,0,0.3076927900314331
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,1024,32,0,0.47680959701538084
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,1024,4,0,2.585412788391113
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,512,16,0,0.32498719692230227
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,1024,2,0,5.286207962036133
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,1024,8,0,1.381873607635498
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,1024,4,0,2.701582336425781
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,1024,32,0,0.5504415988922119
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,1024,16,0,0.8330384254455566
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,1024,64,0,0.3932496070861816
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,1024,2,0,5.1899982452392575
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,32,1024,1,0,10.449049377441407
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,1536,8,0,2.6795183181762696
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,1536,32,0,0.9148336410522461
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,1536,16,0,1.407755184173584
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,1536,4,0,5.447075271606446
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,32,1024,1,0,10.278939056396485
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,16,4096,1,0,67.51004028320312
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,1536,64,0,0.5280687808990479
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,1536,8,0,2.778009605407715
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,1536,2,0,10.814625549316407
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,1536,16,0,1.4637760162353515
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,1536,32,0,1.0023072242736817
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,1536,4,0,5.286923217773437
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,1536,64,0,0.6649024009704589
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,16,4096,1,0,62.821661376953124
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,1536,2,0,10.632254028320313
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,32,2048,8,0,4.531719970703125
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,32,2048,64,0,0.8363375663757324
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,32,2048,32,0,1.231811237335205
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,32,2048,16,0,2.2969375610351563
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,32,2048,4,0,9.101110076904297
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,32,1536,1,0,21.74103698730469
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,32,2048,16,0,2.438547134399414
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,32,2048,32,0,1.4058608055114745
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,32,2048,8,0,4.676577758789063
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,32,1536,1,0,20.788914489746094
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,64,1,1,0,0.017827199399471284
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,1,4,0,0.01592160016298294
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,1,2,0,0.017958399653434754
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,32,2048,64,0,0.93307523727417
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,1,16,0,0.01611679941415787
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,1,32,0,0.014022399485111237
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,1,64,0,0.014796799421310425
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,64,1,1,0,0.022916799783706664
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,1,4,0,0.020083199441432952
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,1,2,0,0.022145600616931917
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,1,8,0,0.019776000082492827
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,1,16,0,0.020923200249671935
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,1,32,0,0.019753600656986236
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,1,64,0,0.020265600085258482
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,32,2048,4,0,8.894232177734375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,16,2,0,0.08541120290756225
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,16,8,0,0.028043198585510253
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,16,4,0,0.05090720057487488
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,16,16,0,0.02020000070333481
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,16,64,0,0.017851200699806214
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,16,32,0,0.01920959949493408
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,64,16,1,0,0.15886240005493163
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,16,2,0,0.09357280135154725
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,16,4,0,0.05947840213775635
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,16,8,0,0.03684639930725098
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,32,2048,2,0,18.472230529785158
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,16,32,0,0.028244799375534056
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,1,8,0,0.01483200043439865
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,64,32,1,0,0.2779504060745239
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,32,2,0,0.14822880029678345
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,32,4,0,0.08736640214920044
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,32,8,0,0.051918399333953855
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,64,16,1,0,0.14615199565887452
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,32,16,0,0.030980798602104186
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,32,32,0,0.022868800163269042
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,32,64,0,0.02208160012960434
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,32,2,0,0.15786880254745483
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,32,4,0,0.09719840288162232
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,32,8,0,0.06387519836425781
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,32,16,0,0.04453440010547638
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,32,32,0,0.032553601264953616
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,32,64,0,0.0316864013671875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,16,16,0,0.026953598856925963
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,64,2,0,0.2764607906341553
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,64,64,1,0,0.5250959873199463
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,16,64,0,0.02579360008239746
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,64,4,0,0.1538032054901123
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,64,16,0,0.05867519974708557
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,64,8,0,0.09182080030441284
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,64,32,0,0.03910239934921265
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,64,64,0,0.029183998703956604
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,64,4,0,0.1676959991455078
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,64,2,0,0.289900803565979
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,64,8,0,0.10746079683303833
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,64,16,0,0.07410079836845399
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,64,32,0,0.05554559826850891
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,64,64,0,0.043191999197006226
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,128,4,0,0.3019920110702515
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,64,32,1,0,0.2822655916213989
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,32,2048,2,0,18.177748107910155
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,128,8,0,0.1710096001625061
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,128,16,0,0.10684000253677368
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,128,64,0,0.05489439964294433
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,128,32,0,0.07672160267829894
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,128,2,0,0.5984623908996582
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,128,4,0,0.3504672050476074
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,128,16,0,0.13799999952316283
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,64,128,1,0,1.1242032051086426
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,128,32,0,0.10107680559158325
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,128,64,0,0.07850559949874877
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,64,64,1,0,0.5472976207733155
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,256,2,0,1.336295986175537
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,256,4,0,0.7054671764373779
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,256,16,0,0.2202159881591797
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,64,128,1,0,1.09891996383667
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,256,32,0,0.144924795627594
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,256,64,0,0.10783200263977051
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,64,256,1,0,2.525707244873047
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,128,8,0,0.19439680576324464
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,256,4,0,0.7469888210296631
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,256,8,0,0.4132959842681885
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,256,32,0,0.18605120182037355
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,256,16,0,0.2632352113723755
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,256,64,0,0.14741599559783936
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,64,256,1,0,2.552774429321289
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,128,2,0,0.5762320041656495
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,256,8,0,0.3738080024719238
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,512,8,0,0.9306719779968262
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,512,16,0,0.6322544097900391
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,512,32,0,0.378548789024353
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,512,64,0,0.25459840297698977
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,512,4,0,1.7888528823852539
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,256,2,0,1.3745375633239747
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,512,2,0,3.45984001159668
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,32,2048,1,0,36.500146484375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,512,8,0,1.0019455909729005
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,512,16,0,0.7257503986358642
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,512,64,0,0.32911200523376466
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,512,32,0,0.4873072147369385
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,512,2,0,3.496049499511719
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,64,512,1,0,6.917851257324219
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,512,4,0,1.8710336685180664
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,64,1024,8,0,2.7193679809570312
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,64,512,1,0,6.671932983398437
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,64,1024,32,0,0.9299807548522949
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,64,1024,64,0,0.6173791885375977
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,64,1024,4,0,5.396675109863281
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,64,1024,16,0,1.4320575714111328
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,32,2048,1,0,35.237030029296875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,64,1024,16,0,1.5470815658569337
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,64,1024,8,0,2.8099056243896485
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,128,1,1,0,0.02783840000629425
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,1,2,0,0.01709119975566864
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,64,1024,32,0,1.083350372314453
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,1,4,0,0.016369600594043732
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,64,1024,64,0,0.7299871921539307
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,1,16,0,0.015647999942302704
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,1,32,0,0.015118399262428283
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,1,64,0,0.015887999534606935
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,128,1,1,0,0.03388319909572601
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,64,1024,2,0,10.652572631835938
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,64,1024,4,0,5.386296081542969
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,1,4,0,0.023281599581241607
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,1,8,0,0.022777600586414336
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,1,32,0,0.022361600399017335
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,1,64,0,0.022603200376033784
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,128,16,1,0,0.27500960826873777
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,16,2,0,0.15029599666595458
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,16,4,0,0.08793439865112304
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,16,8,0,0.05300319790840149
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,16,16,0,0.032014399766922
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,16,32,0,0.023992000520229338
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,16,64,0,0.022944000363349915
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,1,8,0,0.015535999834537507
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,1,2,0,0.023332799971103668
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,16,2,0,0.16153600215911865
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,16,4,0,0.09821280241012573
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,16,8,0,0.06533920168876647
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,16,16,0,0.04504159986972809
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,16,32,0,0.033425599336624146
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,16,64,0,0.032287999987602234
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,1,16,0,0.022950400412082673
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,64,1024,2,0,10.413092803955077
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,32,2,0,0.2788095951080322
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,32,4,0,0.1555600047111511
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,128,32,1,0,0.5348735809326172
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,32,16,0,0.05869439840316772
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,32,32,0,0.03972159922122955
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,32,64,0,0.029665601253509522
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,32,8,0,0.1087488055229187
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,32,4,0,0.16907839775085448
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,32,2,0,0.29451038837432864
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,32,16,0,0.07502080202102661
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,128,32,1,0,0.5410255908966064
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,64,2,0,0.5672239780426025
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,128,64,1,0,1.0938159942626953
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,64,4,0,0.3001039981842041
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,64,16,0,0.1066864013671875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,64,8,0,0.17053920030593872
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,64,32,0,0.07104960083961487
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,64,64,0,0.05314559936523437
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,64,2,0,0.556056022644043
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,32,8,0,0.09540479779243469
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,64,4,0,0.31505119800567627
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,64,8,0,0.19139039516448975
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,64,16,0,0.12921279668807983
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,32,32,0,0.05655999779701233
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,64,32,0,0.09532319903373718
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,64,64,0,0.07779039740562439
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,128,4,0,0.5892975807189942
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,64,1024,1,0,21.548760986328126
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,128,16,1,0,0.287174391746521
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,128,8,0,0.3289263963699341
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,128,32,0,0.13477599620819092
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,128,64,0,0.1011504054069519
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,128,128,1,0,2.16091365814209
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,128,64,1,0,1.0791775703430175
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,128,4,0,0.6260591983795166
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,32,64,0,0.043428799510002135
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,128,8,0,0.3688960075378418
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,128,16,0,0.24086399078369142
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,128,32,0,0.1750208020210266
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,128,64,0,0.14175839424133302
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,64,1024,1,0,20.497639465332032
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,128,2,0,1.142131233215332
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,128,16,0,0.21288480758666992
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,256,4,0,1.3997424125671387
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,256,32,0,0.27364959716796877
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,256,16,0,0.42887039184570314
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,256,64,0,0.19837440252304078
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,256,8,0,0.7577919960021973
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,128,2,0,1.1768575668334962
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,256,2,0,2.6477712631225585
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,256,8,0,0.8089776039123535
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,256,32,0,0.3514352083206177
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,128,128,1,0,2.2176944732666017
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,256,4,0,1.4724592208862304
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,256,64,0,0.27915680408477783
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,256,2,0,2.719500732421875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,128,256,1,0,5.0422607421875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,128,512,8,0,1.8528623580932617
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,128,512,32,0,0.7423200130462646
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,128,512,16,0,1.235198402404785
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,128,512,64,0,0.48676481246948244
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,256,16,0,0.5014256000518799
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,128,256,1,0,5.099700927734375
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,128,512,4,0,3.5955440521240236
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,128,512,32,0,0.8978176116943359
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,128,512,16,0,1.3865344047546386
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,128,512,8,0,1.9919120788574218
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,256,1,1,0,0.049747198820114136
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,1,2,0,0.027432000637054442
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,1,8,0,0.023001599311828613
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,1,16,0,0.02218399941921234
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,128,512,64,0,0.633180809020996
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,1,32,0,0.022121599316596983
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,1,64,0,0.02232159972190857
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,1,2,0,0.03366880118846893
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,128,512,2,0,7.054248046875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,128,512,4,0,3.7189502716064453
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,1,8,0,0.030420801043510436
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,1,16,0,0.029795199632644653
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,1,32,0,0.029100799560546876
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,1,64,0,0.029019200801849367
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,256,16,1,0,0.5418543815612793
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,16,2,0,0.2857327938079834
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,1,4,0,0.02420320063829422
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,16,8,0,0.09512640237808227
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,128,512,2,0,6.965617370605469
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,16,16,0,0.05970720052719116
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,256,1,1,0,0.05533599853515625
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,16,64,0,0.03558239936828613
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,16,32,0,0.041331198811531064
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,1,4,0,0.029758399724960326
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,16,2,0,0.2928096055984497
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,16,4,0,0.17721920013427733
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,256,16,1,0,0.542464017868042
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,16,16,0,0.07580479979515076
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,16,32,0,0.057081598043441775
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,16,64,0,0.049055999517440795
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,32,4,0,0.2968384027481079
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,32,2,0,0.5488255977630615
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,32,8,0,0.16936960220336914
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,32,16,0,0.10798879861831664
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,32,32,0,0.07267040014266968
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,32,64,0,0.05396320223808289
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,32,4,0,0.31696960926055906
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,128,512,1,0,13.768865966796875
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,16,4,0,0.15677759647369385
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,256,32,1,0,1.070308780670166
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,32,32,0,0.09652159810066223
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,32,16,0,0.13426879644393921
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,16,8,0,0.10860639810562134
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,64,4,0,0.5807263851165771
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,64,2,0,1.081993579864502
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,64,8,0,0.3231935977935791
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,128,512,1,0,13.652293395996093
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,64,16,0,0.19845279455184936
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,64,32,0,0.1345919966697693
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,256,64,1,0,2.073756790161133
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,64,64,0,0.10274560451507568
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,32,8,0,0.1943951964378357
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,64,4,0,0.6120319843292237
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,64,8,0,0.37480640411376953
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,64,16,0,0.23826079368591307
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,64,2,0,1.111683177947998
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,32,64,0,0.07878080010414124
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,64,32,0,0.17556159496307372
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,64,64,0,0.14407360553741455
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,128,16,0,0.38425440788269044
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,256,64,1,0,2.1336912155151366
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,128,8,0,0.641219186782837
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,128,32,0,0.25366079807281494
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,128,4,0,1.1638895988464355
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,256,32,1,0,1.061734390258789
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,128,2,0,2.2543264389038087
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,128,8,0,0.7147791862487793
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,32,2,0,0.5652703762054443
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,128,4,0,1.236638355255127
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,128,16,0,0.461411190032959
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,128,64,0,0.26611359119415284
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,128,2,0,2.344897651672363
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,256,128,1,0,4.355766296386719
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,128,64,0,0.19145439863204955
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,8,256,256,16,0,0.8430848121643066
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,16,256,256,8,0,1.4520768165588378
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,4,256,256,32,0,0.5336271762847901
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,256,128,1,0,4.4086864471435545
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,2,256,256,64,0,0.37901279926300047
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,32,256,256,4,0,2.7915071487426757
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,128,32,0,0.328654408454895
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,8,256,256,16,0,0.9823552131652832
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,4,256,256,32,0,0.6760992050170899
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,16,256,256,8,0,1.5835904121398925
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,2,256,256,64,0,0.5295328140258789
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,64,256,256,2,0,5.281355285644532
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,32,256,256,4,0,2.9246288299560548
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,64,256,256,2,0,5.423648071289063
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,float16,128,256,256,1,0,10.132150268554687
SGLang,0.5.9,NVIDIA H200,mla_context,flash_attention,float16,fp8,128,256,256,1,0,10.162429046630859
