framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,beam_width,attn_dtype,kv_cache_dtype,step,latency
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,1,128,1,float16,fp8,0,33.767816162109376
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,4,128,1,float16,fp8,0,33.83320007324219
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,2,128,1,float16,fp8,0,33.975103759765624
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,2,128,1,fp8,fp8,0,33.89339904785156
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,1,128,1,fp8,fp8,0,33.85509338378906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,4,128,1,fp8,fp8,0,34.08707885742187
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,8,128,1,float16,fp8,0,34.12190246582031
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,8,128,1,fp8,fp8,0,34.17952270507813
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,96,128,1,float16,fp8,0,17.621649169921874
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,96,128,1,fp8,fp8,0,17.864102172851563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,1,128,1,float16,fp8,0,16.96361389160156
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,1,128,1,fp8,fp8,0,16.960752868652342
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,2,128,1,float16,fp8,0,17.102207946777344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,2,128,1,fp8,fp8,0,17.09198455810547
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,1,128,1,float16,float16,0,21.53114471435547
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,2,128,1,float16,float16,0,21.404255676269532
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,4,128,1,float16,float16,0,21.648338317871094
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,4,128,1,float16,fp8,0,16.94568328857422
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,4,128,1,fp8,fp8,0,16.874256896972657
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,1,128,1,float16,float16,0,43.65422668457031
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,4,128,1,float16,float16,0,43.90677490234375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,2,128,1,float16,float16,0,43.528070068359376
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,8,128,1,float16,fp8,0,17.137481689453125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,8,128,1,fp8,fp8,0,17.068455505371094
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,96,128,1,float16,fp8,0,8.814278411865235
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,96,128,1,fp8,fp8,0,8.789527893066406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,1,128,1,float16,fp8,0,8.463521575927734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,1,128,1,float16,float16,0,10.95285415649414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,1,128,1,fp8,fp8,0,8.486647796630859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,2,128,1,float16,float16,0,11.006311798095703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,2,128,1,float16,fp8,0,8.452127838134766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,8,128,1,float16,float16,0,21.771885681152344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,2,128,1,fp8,fp8,0,8.510836791992187
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,4,128,1,float16,float16,0,11.178040313720704
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,4,128,1,float16,fp8,0,8.544283294677735
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,4,128,1,fp8,fp8,0,8.626969909667968
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,8,128,1,float16,fp8,0,8.520191955566407
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,8,128,1,float16,float16,0,10.91353759765625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,96,128,1,float16,fp8,0,4.497499084472656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,96,128,1,fp8,fp8,0,4.472617721557617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,8,128,1,fp8,fp8,0,8.50003662109375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,1,128,1,float16,fp8,0,4.292044830322266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,1,128,1,float16,float16,0,5.447300720214844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,1,128,1,fp8,fp8,0,4.27893295288086
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,2,128,1,float16,fp8,0,4.306240081787109
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,2,128,1,fp8,fp8,0,4.425065612792968
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,2,128,1,float16,float16,0,5.434775924682617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,8,128,1,float16,float16,0,43.12900390625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,4,128,1,float16,float16,0,5.451609420776367
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,4,128,1,float16,fp8,0,4.319761657714844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,4,128,1,fp8,fp8,0,4.2107696533203125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,8,128,1,float16,float16,0,5.558707046508789
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,8,128,1,float16,fp8,0,4.181265640258789
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,8,128,1,fp8,fp8,0,4.386745452880859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,96,128,1,float16,float16,0,5.626287841796875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,96,128,1,float16,float16,0,11.316252899169921
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,1,128,1,float16,fp8,0,19.65789794921875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,96,128,1,float16,float16,0,22.77640380859375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,1,128,1,fp8,fp8,0,19.683132934570313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,2,128,1,float16,fp8,0,19.577244567871094
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,1,128,1,float16,float16,0,24.990676879882812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,2,128,1,fp8,fp8,0,19.744139099121092
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,2,128,1,float16,float16,0,24.71026153564453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,4,128,1,float16,float16,0,25.234721374511718
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,96,128,1,float16,fp8,0,10.2931884765625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,96,128,1,float16,float16,0,13.365058898925781
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,4,128,1,float16,fp8,0,19.66491241455078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,4,128,1,fp8,fp8,0,19.500102233886718
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,8,128,1,float16,fp8,0,19.70129852294922
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,8,128,1,fp8,fp8,0,19.820091247558594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,8,128,1,float16,float16,0,25.323265075683594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,1,128,1,float16,fp8,0,9.7084716796875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,1,128,1,fp8,fp8,0,9.795289611816406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,1,128,1,float16,float16,0,12.531793975830078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,2,128,1,float16,fp8,0,9.89807357788086
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,2,128,1,float16,float16,0,12.579329681396484
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,2,128,1,fp8,fp8,0,9.966788482666015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,4,128,1,float16,float16,0,12.632360076904297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,96,128,1,fp8,fp8,0,10.310377502441407
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,4,128,1,float16,fp8,0,9.901847839355469
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,4,128,1,fp8,fp8,0,9.823554992675781
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,96,128,1,float16,float16,0,6.695558166503906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,8,128,1,float16,fp8,0,9.896444702148438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,8,128,1,float16,float16,0,12.41664810180664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,8,128,1,fp8,fp8,0,9.80093765258789
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,96,128,1,fp8,fp8,0,5.149163055419922
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,96,128,1,float16,fp8,0,5.292657470703125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,1,128,1,float16,fp8,0,4.882947158813477
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,1,128,1,float16,float16,0,6.334735870361328
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,1,128,1,fp8,fp8,0,4.940225601196289
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,2,128,1,float16,fp8,0,4.9494670867919925
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,2,128,1,fp8,fp8,0,4.87074089050293
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,2,128,1,float16,float16,0,6.238252639770508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,4,128,1,float16,fp8,0,4.901164627075195
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,4,128,1,float16,float16,0,6.4773101806640625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,4,128,1,fp8,fp8,0,4.8244270324707035
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,96,128,1,float16,float16,0,3.2691585540771486
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,96,128,1,float16,fp8,0,2.8366943359375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,8,128,1,float16,float16,0,6.371499252319336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,8,128,1,float16,fp8,0,4.927924728393554
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,96,128,1,fp8,fp8,0,2.642038345336914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,8,128,1,fp8,fp8,0,4.851457595825195
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,1,128,1,float16,fp8,0,2.4816768646240233
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,1,128,1,float16,float16,0,3.1909584045410155
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,1,128,1,fp8,fp8,0,2.819704055786133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,2,128,1,float16,fp8,0,2.7199167251586913
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,2,128,1,float16,float16,0,3.039476776123047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,2,128,1,fp8,fp8,0,2.4480928421020507
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,4,128,1,float16,fp8,0,2.4563039779663085
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,4,128,1,float16,float16,0,2.916059112548828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,4,128,1,fp8,fp8,0,2.793814468383789
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,8,128,1,float16,fp8,0,2.452262306213379
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,8,128,1,fp8,fp8,0,2.773601531982422
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,8,128,1,float16,float16,0,2.992086410522461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,1,128,1,float16,fp8,0,13.983253479003906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,1,128,1,fp8,fp8,0,13.774855041503907
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,2,128,1,float16,fp8,0,13.815704345703125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,2,128,1,fp8,fp8,0,13.804563903808594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,1,128,1,float16,float16,0,17.825718688964844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,4,128,1,float16,fp8,0,13.718319702148438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,2,128,1,float16,float16,0,17.835499572753907
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,4,128,1,float16,float16,0,17.657225036621092
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,96,128,1,float16,fp8,0,7.646116638183594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,96,128,1,float16,float16,0,9.571199798583985
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,4,128,1,fp8,fp8,0,13.9740234375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,96,128,1,fp8,fp8,0,7.496900939941407
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,1,128,1,float16,float16,0,8.794627380371093
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,8,128,1,fp8,fp8,0,14.080078125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,8,128,1,float16,fp8,0,14.022691345214843
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,8,128,1,float16,float16,0,17.744247436523438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,1,128,1,float16,fp8,0,7.033041381835938
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,1,128,1,fp8,fp8,0,6.964529418945313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,2,128,1,float16,fp8,0,6.956646728515625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,2,128,1,float16,float16,0,9.014889526367188
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,2,128,1,fp8,fp8,0,6.8908943176269535
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,4,128,1,float16,fp8,0,6.960676574707032
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,4,128,1,float16,float16,0,9.051728057861329
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,4,128,1,fp8,fp8,0,6.941185760498047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,96,128,1,float16,fp8,0,3.6734737396240233
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,96,128,1,fp8,fp8,0,3.750254440307617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,8,128,1,float16,fp8,0,6.972395324707032
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,96,128,1,float16,float16,0,4.763276672363281
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,8,128,1,fp8,fp8,0,7.02408447265625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,8,128,1,float16,float16,0,8.9295166015625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,1,128,1,float16,float16,0,4.433251190185547
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,1,128,1,float16,fp8,0,3.4468719482421877
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,1,128,1,fp8,fp8,0,3.433176040649414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,2,128,1,float16,fp8,0,3.502920150756836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,2,128,1,fp8,fp8,0,3.6287742614746095
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,2,128,1,float16,float16,0,4.375324630737305
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,4,128,1,float16,fp8,0,3.6957889556884767
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,4,128,1,float16,float16,0,4.374009704589843
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,4,128,1,fp8,fp8,0,3.558643341064453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,8,128,1,float16,float16,0,4.310870361328125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,96,128,1,float16,float16,0,2.2538448333740235
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,96,128,1,float16,fp8,0,2.283724784851074
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,8,128,1,float16,fp8,0,3.4537937164306642
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,8,128,1,fp8,fp8,0,3.5180992126464843
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,96,128,1,fp8,fp8,0,1.8847152709960937
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,1,128,1,float16,float16,0,2.287571144104004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,1,128,1,float16,fp8,0,1.7582000732421874
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,1,128,1,fp8,fp8,0,2.0725263595581054
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,2,128,1,float16,fp8,0,1.78876953125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,2,128,1,fp8,fp8,0,1.7871583938598632
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,2,128,1,float16,float16,0,2.215340805053711
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,4,128,1,float16,fp8,0,1.7605520248413087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,4,128,1,float16,float16,0,2.07193603515625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,4,128,1,fp8,fp8,0,1.9996496200561524
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,8,128,1,fp8,fp8,0,1.75140323638916
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,8,128,1,float16,float16,0,2.018499183654785
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,8,128,1,float16,fp8,0,1.7683504104614258
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,1,128,1,float16,fp8,0,18.226560974121092
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,1,128,1,fp8,fp8,0,18.476681518554688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,2,128,1,float16,fp8,0,18.541822814941405
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,2,128,1,fp8,fp8,0,18.368959045410158
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,4,128,1,float16,fp8,0,18.1023681640625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,1,128,1,float16,float16,0,23.173544311523436
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,2,128,1,float16,float16,0,23.345654296875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,4,128,1,float16,float16,0,23.384571838378907
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,96,128,1,float16,fp8,0,9.817562866210938
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,96,128,1,float16,float16,0,12.592852783203124
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,96,128,1,fp8,fp8,0,9.943988800048828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,4,128,1,fp8,fp8,0,18.446356201171874
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,1,128,1,float16,float16,0,11.718389129638672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,8,128,1,float16,fp8,0,18.890298461914064
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,8,128,1,fp8,fp8,0,18.635693359375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,8,128,1,float16,float16,0,23.51884307861328
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,1,128,1,float16,fp8,0,9.164048004150391
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,1,128,1,fp8,fp8,0,9.026844787597657
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,2,128,1,float16,float16,0,11.707068634033202
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,2,128,1,float16,fp8,0,9.084299468994141
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,2,128,1,fp8,fp8,0,9.068478393554688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,4,128,1,float16,fp8,0,9.20804672241211
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,4,128,1,float16,float16,0,11.746614074707031
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,4,128,1,fp8,fp8,0,9.183977508544922
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,96,128,1,float16,fp8,0,4.906350326538086
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,8,128,1,float16,fp8,0,9.141480255126954
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,96,128,1,float16,float16,0,6.274512100219726
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,96,128,1,fp8,fp8,0,4.9743297576904295
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,8,128,1,float16,float16,0,11.842427062988282
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,8,128,1,fp8,fp8,0,9.1395263671875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,1,128,1,float16,float16,0,5.834195327758789
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,1,128,1,fp8,fp8,0,4.532447814941406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,2,128,1,fp8,fp8,0,4.519027328491211
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,2,128,1,float16,fp8,0,4.6079246520996096
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,2,128,1,float16,float16,0,5.733785629272461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,1,128,1,float16,fp8,0,4.554217529296875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,4,128,1,float16,fp8,0,4.626270294189453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,4,128,1,float16,float16,0,5.750723266601563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,4,128,1,fp8,fp8,0,4.570207977294922
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,96,128,1,float16,float16,0,2.9342111587524413
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,8,128,1,float16,fp8,0,4.515379333496094
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,8,128,1,fp8,fp8,0,4.590726470947265
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,8,128,1,float16,float16,0,5.812561416625977
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,1,128,1,float16,fp8,0,2.308468818664551
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,96,128,1,fp8,fp8,0,2.4621583938598635
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,96,128,1,float16,fp8,0,2.487513542175293
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,1,128,1,float16,float16,0,2.7636240005493162
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,1,128,1,fp8,fp8,0,2.4920352935791015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,2,128,1,float16,float16,0,2.7310752868652344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,4,128,1,float16,fp8,0,2.2761167526245116
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,2,128,1,float16,fp8,0,2.2945104598999024
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,4,128,1,float16,float16,0,2.834160041809082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,4,128,1,fp8,fp8,0,2.271238327026367
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,2,128,1,fp8,fp8,0,2.5381040573120117
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,8,128,1,float16,fp8,0,2.3042064666748048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,96,128,1,float16,float16,0,1.464395236968994
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,8,128,1,float16,float16,0,2.828108787536621
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,96,128,1,float16,fp8,0,1.2639391899108887
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,96,128,1,fp8,fp8,0,1.5979567527770997
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,8,128,1,fp8,fp8,0,2.502649688720703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,1,128,1,float16,fp8,0,1.1769184112548827
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,1,128,1,fp8,fp8,0,1.1755359649658204
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,1,128,1,float16,float16,0,1.342692756652832
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,2,128,1,float16,float16,0,1.3866527557373047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,2,128,1,fp8,fp8,0,1.166579246520996
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,4,128,1,float16,fp8,0,1.1875727653503418
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,4,128,1,float16,float16,0,1.3267536163330078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,2,128,1,float16,fp8,0,1.4429951667785645
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,8,128,1,float16,fp8,0,1.2796560287475587
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,8,128,1,float16,float16,0,1.3452863693237305
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,4,128,1,fp8,fp8,0,1.167467212677002
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,8,128,1,fp8,fp8,0,1.321337604522705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,1,128,1,float16,fp8,0,10.754937744140625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,1,128,1,fp8,fp8,0,10.692105865478515
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,2,128,1,fp8,fp8,0,10.794048309326172
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,4,128,1,float16,fp8,0,10.679167938232421
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,1,128,1,float16,float16,0,13.320686340332031
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,2,128,1,float16,float16,0,13.49268035888672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,4,128,1,float16,float16,0,13.45069580078125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,2,128,1,float16,fp8,0,10.813359832763672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,96,128,1,float16,fp8,0,6.034384155273438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,96,128,1,float16,float16,0,7.54647216796875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,96,128,1,fp8,fp8,0,6.014644622802734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,1,128,1,float16,float16,0,6.649056243896484
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,4,128,1,fp8,fp8,0,10.759307098388671
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,8,128,1,float16,fp8,0,10.907663726806641
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,8,128,1,fp8,fp8,0,10.817926025390625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,8,128,1,float16,float16,0,13.828227233886718
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,1,128,1,float16,fp8,0,5.301547241210938
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,1,128,1,fp8,fp8,0,5.358950424194336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,2,128,1,fp8,fp8,0,5.302827072143555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,2,128,1,float16,fp8,0,5.404504013061524
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,2,128,1,float16,float16,0,6.822289276123047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,4,128,1,float16,fp8,0,5.3237041473388675
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,4,128,1,float16,float16,0,6.796894073486328
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,4,128,1,fp8,fp8,0,5.480255889892578
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,96,128,1,float16,fp8,0,3.127716827392578
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,96,128,1,float16,float16,0,3.7258880615234373
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,96,128,1,fp8,fp8,0,2.9829343795776366
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,8,128,1,float16,fp8,0,5.318950271606445
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,8,128,1,float16,float16,0,6.880120086669922
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,1,128,1,float16,float16,0,3.336336135864258
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,8,128,1,fp8,fp8,0,5.3839599609375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,1,128,1,float16,fp8,0,2.661790466308594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,1,128,1,fp8,fp8,0,2.6847103118896483
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,2,128,1,float16,fp8,0,2.689009666442871
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,2,128,1,fp8,fp8,0,2.6918384552001955
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,2,128,1,float16,float16,0,3.3666702270507813
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,4,128,1,fp8,fp8,0,2.678481674194336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,4,128,1,float16,fp8,0,2.870275115966797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,4,128,1,float16,float16,0,3.4206592559814455
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,96,128,1,float16,float16,0,1.722275161743164
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,8,128,1,float16,fp8,0,2.6846351623535156
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,96,128,1,float16,fp8,0,1.698681640625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,8,128,1,float16,float16,0,3.3530174255371095
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,96,128,1,fp8,fp8,0,1.532051181793213
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,1,128,1,float16,float16,0,1.5445664405822754
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,8,128,1,fp8,fp8,0,2.6630191802978516
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,1,128,1,float16,fp8,0,1.5757792472839356
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,1,128,1,fp8,fp8,0,1.5231552124023438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,2,128,1,float16,float16,0,1.5328463554382323
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,2,128,1,fp8,fp8,0,1.6974288940429687
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,4,128,1,float16,float16,0,1.5080767631530763
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,4,128,1,float16,fp8,0,1.556276798248291
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,8,128,1,float16,float16,0,1.5363311767578125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,4,128,1,fp8,fp8,0,1.3597951889038087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,8,128,1,float16,fp8,0,1.393179225921631
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,8,128,1,fp8,fp8,0,1.4762559890747071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,96,128,1,float16,float16,0,0.8997664451599121
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,96,128,1,float16,fp8,0,0.8971967697143555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,1,128,1,float16,fp8,0,0.7052527904510498
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,1,128,1,float16,float16,0,0.8078607559204102
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,1,128,1,fp8,fp8,0,0.7144864082336426
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,96,128,1,fp8,fp8,0,0.7980991840362549
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,2,128,1,float16,float16,0,0.7909503936767578
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,2,128,1,float16,fp8,0,0.785916805267334
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,2,128,1,fp8,fp8,0,0.7027567863464356
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,4,128,1,float16,float16,0,0.8411808013916016
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,4,128,1,float16,fp8,0,0.7144976139068604
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,8,128,1,float16,float16,0,0.8162688255310059
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,4,128,1,fp8,fp8,0,0.7174767971038818
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,8,128,1,fp8,fp8,0,0.7032432079315185
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,2,128,1,float16,fp8,0,1.389411163330078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,8,128,1,float16,fp8,0,0.723635196685791
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,1,128,1,float16,fp8,0,10.125115203857423
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,2,128,1,float16,fp8,0,10.157921600341798
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,1,128,1,fp8,fp8,0,10.275447845458984
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,2,128,1,fp8,fp8,0,10.182494354248046
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,1,128,1,float16,float16,0,12.913055419921875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,2,128,1,float16,float16,0,12.707695770263673
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,4,128,1,float16,float16,0,12.590471649169922
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,4,128,1,float16,fp8,0,10.087083435058593
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,96,128,1,float16,fp8,0,5.9727630615234375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,96,128,1,float16,float16,0,7.483700561523437
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,4,128,1,fp8,fp8,0,10.307266998291016
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,8,128,1,float16,fp8,0,10.297309112548827
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,8,128,1,fp8,fp8,0,10.451274871826172
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,96,128,1,fp8,fp8,0,5.961372756958008
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,8,128,1,float16,float16,0,13.034796142578125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,1,128,1,float16,float16,0,6.311283111572266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,1,128,1,float16,fp8,0,5.101124954223633
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,1,128,1,fp8,fp8,0,5.157900619506836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,2,128,1,float16,fp8,0,5.134790420532227
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,2,128,1,fp8,fp8,0,5.159131240844727
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,2,128,1,float16,float16,0,6.287329483032226
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,4,128,1,float16,fp8,0,5.142919921875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,4,128,1,fp8,fp8,0,5.204891204833984
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,4,128,1,float16,float16,0,6.375193786621094
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,96,128,1,float16,fp8,0,3.006319999694824
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,96,128,1,float16,float16,0,3.639708709716797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,8,128,1,float16,float16,0,6.433092498779297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,8,128,1,float16,fp8,0,5.124209594726563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,8,128,1,fp8,fp8,0,5.17957763671875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,96,128,1,fp8,fp8,0,2.9355136871337892
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,1,128,1,float16,float16,0,3.013080024719238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,1,128,1,float16,fp8,0,2.6989728927612306
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,2,128,1,fp8,fp8,0,2.585335922241211
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,1,128,1,fp8,fp8,0,2.5423776626586916
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,2,128,1,float16,fp8,0,2.7832111358642577
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,2,128,1,float16,float16,0,3.0601007461547853
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,4,128,1,float16,float16,0,3.0872800827026365
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,4,128,1,float16,fp8,0,2.538870429992676
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,4,128,1,fp8,fp8,0,2.900569534301758
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,96,128,1,float16,float16,0,1.7068815231323242
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,8,128,1,fp8,fp8,0,2.540140724182129
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,1,128,1,float16,float16,0,1.4692031860351562
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,96,128,1,float16,fp8,0,1.6707279205322265
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,96,128,1,fp8,fp8,0,1.7176752090454102
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,1,128,1,float16,fp8,0,1.3109647750854492
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,1,128,1,fp8,fp8,0,1.4587488174438477
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,2,128,1,float16,float16,0,1.4744671821594237
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,2,128,1,float16,fp8,0,1.4710991859436036
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,4,128,1,float16,fp8,0,1.2917280197143555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,4,128,1,float16,float16,0,1.4700927734375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,2,128,1,fp8,fp8,0,1.2979087829589844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,4,128,1,fp8,fp8,0,1.325766372680664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,8,128,1,float16,float16,0,2.929118347167969
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,96,128,1,float16,float16,0,0.9393376350402832
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,8,128,1,float16,fp8,0,1.3339327812194823
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,8,128,1,float16,float16,0,1.5651935577392577
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,96,128,1,fp8,fp8,0,0.7709936141967774
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,8,128,1,fp8,fp8,0,1.2915375709533692
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,96,128,1,float16,fp8,0,0.763588809967041
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,1,128,1,float16,float16,0,0.718996810913086
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,1,128,1,float16,fp8,0,0.8007696151733399
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,2,128,1,float16,fp8,0,0.6683695793151856
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,2,128,1,fp8,fp8,0,0.6667615890502929
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,2,128,1,float16,float16,0,0.8071344375610352
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,1,128,1,fp8,fp8,0,0.6810527801513672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,4,128,1,float16,fp8,0,0.668555212020874
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,8,128,1,float16,fp8,0,0.6676159858703613
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,4,128,1,fp8,fp8,0,0.7928624153137207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,8,128,1,fp8,fp8,0,0.770739221572876
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,96,128,1,float16,fp8,0,0.4055952072143555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,96,128,1,float16,float16,0,0.44877920150756834
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,96,128,1,fp8,fp8,0,0.4046944141387939
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,1,128,1,float16,fp8,0,0.37943520545959475
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,2,128,1,float16,fp8,0,0.3541071891784668
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,2,128,1,fp8,fp8,0,0.3669408082962036
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,2,128,1,float16,float16,0,0.380020809173584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,4,128,1,float16,float16,0,0.37837920188903806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,4,128,1,float16,fp8,0,0.3678992033004761
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,8,128,1,float16,float16,0,0.38162078857421877
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,4,128,1,fp8,fp8,0,0.3535583972930908
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,8,128,1,fp8,fp8,0,0.35633599758148193
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,8,128,1,float16,fp8,0,2.5553152084350588
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,4,128,1,float16,float16,0,0.7390048027038574
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,8,128,1,float16,float16,0,0.7373023986816406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,1,128,1,float16,float16,0,0.38657279014587403
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,1,128,1,fp8,fp8,0,0.35484158992767334
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,8,128,1,float16,fp8,0,0.36765921115875244
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,1,128,1,float16,fp8,0,6.10235824584961
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,1,128,1,fp8,fp8,0,6.097723388671875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,1,128,1,float16,float16,0,7.436787414550781
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,2,128,1,float16,fp8,0,6.092265701293945
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,2,128,1,float16,float16,0,7.490806579589844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,2,128,1,fp8,fp8,0,6.102212905883789
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,4,128,1,float16,float16,0,7.411772918701172
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,96,128,1,float16,fp8,0,3.642108917236328
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,4,128,1,float16,fp8,0,6.1636192321777346
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,96,128,1,float16,float16,0,4.409740829467774
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,4,128,1,fp8,fp8,0,6.1443359375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,8,128,1,float16,fp8,0,6.103457641601563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,8,128,1,fp8,fp8,0,6.221012878417969
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,96,128,1,fp8,fp8,0,3.808078384399414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,8,128,1,float16,float16,0,7.705574035644531
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,1,128,1,float16,fp8,0,3.0877599716186523
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,1,128,1,float16,float16,0,3.5768768310546877
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,1,128,1,fp8,fp8,0,3.1886192321777345
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,2,128,1,float16,fp8,0,3.072494316101074
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,2,128,1,float16,float16,0,3.7048736572265626
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,2,128,1,fp8,fp8,0,3.083875274658203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,4,128,1,float16,float16,0,3.6114017486572267
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,4,128,1,float16,fp8,0,3.2369136810302734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,4,128,1,fp8,fp8,0,3.0982656478881836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,8,128,1,float16,fp8,0,3.0819536209106446
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,96,128,1,float16,float16,0,2.3241968154907227
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,96,128,1,float16,fp8,0,1.854043197631836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,8,128,1,float16,float16,0,3.704822540283203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,1,128,1,float16,float16,0,1.696945571899414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,8,128,1,fp8,fp8,0,3.0783647537231444
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,1,128,1,float16,fp8,0,1.5588047981262207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,96,128,1,fp8,fp8,0,2.1930095672607424
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,1,128,1,fp8,fp8,0,1.5565199851989746
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,2,128,1,float16,fp8,0,1.5490415573120118
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,2,128,1,float16,float16,0,1.8458160400390624
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,2,128,1,fp8,fp8,0,1.7103103637695312
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,4,128,1,fp8,fp8,0,1.5496159553527833
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,8,128,1,float16,float16,0,1.7151056289672852
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,4,128,1,float16,fp8,0,1.5734432220458985
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,8,128,1,float16,fp8,0,1.5511072158813477
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,96,128,1,float16,float16,0,1.2046208381652832
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,96,128,1,float16,fp8,0,0.9662591934204101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,8,128,1,fp8,fp8,0,1.5522303581237793
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,96,128,1,fp8,fp8,0,0.9384832382202148
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,1,128,1,float16,fp8,0,0.7899919986724854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,1,128,1,float16,float16,0,0.9418720245361328
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,1,128,1,fp8,fp8,0,0.7921872138977051
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,2,128,1,float16,fp8,0,0.793284797668457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,2,128,1,float16,float16,0,0.9447551727294922
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,2,128,1,fp8,fp8,0,0.7939839839935303
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,4,128,1,float16,float16,0,0.8569328308105468
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,4,128,1,float16,fp8,0,0.7902143955230713
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,8,128,1,float16,float16,0,0.8579456329345703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,96,128,1,float16,float16,0,0.5447743892669678
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,8,128,1,float16,fp8,0,0.7933040142059327
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,96,128,1,float16,fp8,0,0.4908048152923584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,4,128,1,fp8,fp8,0,0.8516672134399415
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,8,128,1,fp8,fp8,0,0.8728015899658204
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,1,128,1,float16,float16,0,0.44211039543151853
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,1,128,1,fp8,fp8,0,0.4134672164916992
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,2,128,1,float16,float16,0,0.45934238433837893
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,2,128,1,float16,fp8,0,0.42336320877075195
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,2,128,1,fp8,fp8,0,0.4305600166320801
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,4,128,1,float16,fp8,0,0.4141727924346924
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,4,128,1,fp8,fp8,0,0.4295519828796387
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,4,128,1,float16,float16,0,1.932267189025879
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,8,128,1,float16,fp8,0,0.4283279895782471
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,4,128,1,float16,float16,0,0.4492800235748291
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,96,128,1,float16,float16,0,0.2901936054229736
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,96,128,1,float16,fp8,0,0.26216959953308105
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,1,128,1,float16,float16,0,0.2452064037322998
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,96,128,1,fp8,fp8,0,0.2672575950622559
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,1,128,1,float16,fp8,0,0.2287951946258545
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,1,128,1,fp8,fp8,0,0.2243824005126953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,2,128,1,float16,float16,0,0.235315203666687
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,2,128,1,fp8,fp8,0,0.22765278816223145
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,4,128,1,float16,float16,0,0.23605599403381347
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,4,128,1,float16,fp8,0,0.2252351999282837
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,4,128,1,fp8,fp8,0,0.22607200145721434
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,8,128,1,float16,float16,0,0.2378688097000122
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,8,128,1,float16,fp8,0,0.22551839351654052
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,8,128,1,fp8,fp8,0,0.22597119808197022
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,96,128,1,fp8,fp8,0,0.48563518524169924
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,1,128,1,float16,fp8,0,0.41234397888183594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,8,128,1,float16,float16,0,0.4488255977630615
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,8,128,1,fp8,fp8,0,0.4128416061401367
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,2,128,1,float16,fp8,0,0.2362895965576172
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,1,128,1,float16,fp8,0,6.187212753295898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,1,128,1,fp8,fp8,0,6.185108947753906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,1,128,1,float16,float16,0,7.349180603027344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,2,128,1,float16,fp8,0,6.178551864624024
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,2,128,1,float16,float16,0,7.292332458496094
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,2,128,1,fp8,fp8,0,6.16992301940918
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,4,128,1,float16,fp8,0,6.186883163452149
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,4,128,1,float16,float16,0,7.341361236572266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,4,128,1,fp8,fp8,0,6.182174301147461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,96,128,1,float16,fp8,0,3.868220901489258
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,96,128,1,float16,float16,0,4.657331085205078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,1,128,1,float16,float16,0,3.6369487762451174
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,8,128,1,float16,fp8,0,6.210004806518555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,96,128,1,fp8,fp8,0,3.884041595458984
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,8,128,1,float16,float16,0,7.5927581787109375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,8,128,1,fp8,fp8,0,6.1775470733642575
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,1,128,1,float16,fp8,0,3.105580711364746
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,1,128,1,fp8,fp8,0,3.0981216430664062
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,2,128,1,float16,fp8,0,3.1053535461425783
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,2,128,1,fp8,fp8,0,3.10689754486084
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,2,128,1,float16,float16,0,3.680652618408203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,4,128,1,float16,fp8,0,3.1025455474853514
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,4,128,1,float16,float16,0,3.7834896087646483
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,4,128,1,fp8,fp8,0,3.1046335220336916
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,96,128,1,float16,fp8,0,1.9708751678466796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,8,128,1,float16,float16,0,3.6580478668212892
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,96,128,1,float16,float16,0,2.423017692565918
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,96,128,1,fp8,fp8,0,2.117945671081543
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,8,128,1,float16,fp8,0,3.100315284729004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,8,128,1,fp8,fp8,0,3.1066335678100585
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,1,128,1,float16,float16,0,1.7080511093139648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,1,128,1,float16,fp8,0,1.573204803466797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,1,128,1,fp8,fp8,0,1.7234064102172852
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,2,128,1,float16,fp8,0,1.573692798614502
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,2,128,1,fp8,fp8,0,1.5625391960144044
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,2,128,1,float16,float16,0,1.8296672821044921
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,4,128,1,float16,float16,0,1.6522640228271483
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,4,128,1,float16,fp8,0,1.562764835357666
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,4,128,1,fp8,fp8,0,1.7171056747436524
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,96,128,1,float16,fp8,0,0.9881423950195313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,96,128,1,float16,float16,0,1.196183967590332
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,1,128,1,float16,float16,0,0.8378992080688477
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,8,128,1,float16,float16,0,1.7216175079345704
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,8,128,1,float16,fp8,0,1.584812831878662
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,8,128,1,fp8,fp8,0,1.5679375648498535
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,1,128,1,float16,fp8,0,0.8143088340759277
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,1,128,1,fp8,fp8,0,0.7942048072814941
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,2,128,1,float16,fp8,0,0.8684224128723145
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,2,128,1,float16,float16,0,0.9131168365478516
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,2,128,1,fp8,fp8,0,0.913088035583496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,4,128,1,fp8,fp8,0,0.8006560325622558
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,4,128,1,float16,float16,0,0.8439423561096191
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,4,128,1,float16,fp8,0,0.7953775882720947
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,1,128,1,float16,float16,0,0.4247280120849609
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,96,128,1,float16,fp8,0,0.5093535900115966
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,96,128,1,float16,float16,0,0.5920176029205322
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,96,128,1,fp8,fp8,0,0.5446864128112793
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,8,128,1,float16,fp8,0,0.8132944107055664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,8,128,1,float16,float16,0,0.8560735702514648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,8,128,1,fp8,fp8,0,0.9100959777832032
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,1,128,1,float16,fp8,0,0.4114511966705322
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,2,128,1,float16,float16,0,0.4406911849975586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,2,128,1,float16,fp8,0,0.4272831916809082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,1,128,1,fp8,fp8,0,0.4107664108276367
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,4,128,1,float16,float16,0,0.4303391933441162
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,4,128,1,float16,fp8,0,0.4366464138031006
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,4,128,1,fp8,fp8,0,0.41136960983276366
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,8,128,1,float16,fp8,0,0.41800317764282224
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,96,128,1,float16,float16,0,0.2933903932571411
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,8,128,1,fp8,fp8,0,0.4120512008666992
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,96,128,1,fp8,fp8,0,0.2685487985610962
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,1,128,1,float16,float16,0,0.2293008089065552
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,1,128,1,float16,fp8,0,0.22923359870910645
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,1,128,1,fp8,fp8,0,0.21976799964904786
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,2,128,1,float16,float16,0,0.22549760341644287
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,2,128,1,float16,fp8,0,0.2234639883041382
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,2,128,1,fp8,fp8,0,0.2234976053237915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,4,128,1,float16,float16,0,0.22782559394836427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,96,128,1,fp8,fp8,0,1.1423680305480957
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,4,128,1,float16,fp8,0,0.2194943904876709
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,4,128,1,fp8,fp8,0,0.22073280811309814
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,8,128,1,float16,float16,0,0.23299040794372558
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,8,128,1,float16,fp8,0,0.21999680995941162
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,96,128,1,float16,float16,0,0.16185120344161988
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,8,128,1,fp8,fp8,0,0.21979680061340331
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,1,128,1,float16,float16,0,0.12779840230941772
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,96,128,1,fp8,fp8,0,0.15134719610214234
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,1,128,1,float16,fp8,0,0.12206079959869384
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,1,128,1,fp8,fp8,0,0.12289760112762452
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,2,128,1,float16,float16,0,0.12770719528198243
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,2,128,1,float16,fp8,0,0.12445759773254395
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,2,128,1,fp8,fp8,0,0.12206399440765381
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,4,128,1,float16,float16,0,0.1293071985244751
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,96,128,1,float16,fp8,0,0.14929280281066895
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,4,128,1,fp8,fp8,0,0.12437119483947753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,8,128,1,float16,float16,0,0.13029760122299194
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,8,128,1,float16,fp8,0,0.12286560535430908
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,8,128,1,fp8,fp8,0,0.12320799827575683
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,4,128,1,float16,fp8,0,0.12202880382537842
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,2,128,1,fp8,fp8,0,0.4133440017700195
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,8,128,1,float16,float16,0,0.44661917686462405
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,96,128,1,float16,fp8,0,0.27669920921325686
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,1,128,1,float16,fp8,0,3.9168495178222655
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,1,128,1,fp8,fp8,0,3.918854522705078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,1,128,1,float16,float16,0,4.450022506713867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,2,128,1,float16,fp8,0,3.9142303466796875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,2,128,1,fp8,fp8,0,3.907316970825195
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,4,128,1,float16,fp8,0,3.9094608306884764
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,4,128,1,float16,float16,0,4.536548614501953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,2,128,1,float16,float16,0,4.367545700073242
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,96,128,1,float16,float16,0,2.910558319091797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,8,128,1,float16,float16,0,4.381711959838867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,8,128,1,float16,fp8,0,4.019327926635742
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,96,128,1,float16,fp8,0,2.54663200378418
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,8,128,1,fp8,fp8,0,3.9111743927001954
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,96,128,1,fp8,fp8,0,2.6532608032226563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,1,128,1,float16,float16,0,2.068428802490234
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,1,128,1,fp8,fp8,0,1.9664304733276368
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,2,128,1,float16,fp8,0,1.9669008255004883
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,1,128,1,float16,fp8,0,2.1329504013061524
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,2,128,1,float16,float16,0,2.0942399978637694
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,2,128,1,fp8,fp8,0,1.9687711715698242
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,4,128,1,fp8,fp8,0,3.913915252685547
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,4,128,1,float16,float16,0,2.279230308532715
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,4,128,1,float16,fp8,0,1.9657695770263672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,4,128,1,fp8,fp8,0,1.9678239822387695
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,96,128,1,float16,float16,0,1.4314751625061035
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,8,128,1,fp8,fp8,0,1.9663824081420898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,8,128,1,float16,float16,0,2.1374319076538084
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,96,128,1,float16,fp8,0,1.2864336013793944
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,1,128,1,float16,float16,0,1.0379088401794434
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,1,128,1,float16,fp8,0,0.9955904006958007
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,96,128,1,fp8,fp8,0,1.3686464309692383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,1,128,1,fp8,fp8,0,1.005020809173584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,2,128,1,float16,float16,0,1.0415967941284179
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,2,128,1,fp8,fp8,0,0.9936047554016113
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,2,128,1,float16,fp8,0,1.1268320083618164
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,4,128,1,float16,fp8,0,0.9953904151916504
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,4,128,1,float16,float16,0,1.0733951568603515
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,8,128,1,float16,float16,0,1.046457576751709
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,96,128,1,float16,float16,0,0.7346720218658447
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,8,128,1,float16,fp8,0,1.0547679901123046
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,8,128,1,fp8,fp8,0,0.9937904357910157
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,96,128,1,float16,fp8,0,0.6553823947906494
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,96,128,1,fp8,fp8,0,0.7578239917755127
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,1,128,1,float16,float16,0,0.5600368022918701
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,1,128,1,float16,fp8,0,0.5157087802886963
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,1,128,1,fp8,fp8,0,0.5235424041748047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,2,128,1,float16,float16,0,0.5196208000183106
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,2,128,1,float16,fp8,0,0.5092527866363525
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,4,128,1,float16,float16,0,0.5655807971954345
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,4,128,1,float16,fp8,0,0.5116288185119628
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,4,128,1,fp8,fp8,0,0.5110015869140625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,2,128,1,fp8,fp8,0,0.5326303958892822
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,8,128,1,float16,float16,0,0.5316336154937744
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,96,128,1,float16,float16,0,0.3691663980484009
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,96,128,1,float16,fp8,0,0.3409071922302246
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,8,128,1,fp8,fp8,0,0.5336527824401855
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,1,128,1,float16,float16,0,0.26991839408874513
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,96,128,1,fp8,fp8,0,0.351200008392334
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,1,128,1,float16,fp8,0,0.2667232036590576
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,1,128,1,fp8,fp8,0,0.2661616086959839
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,2,128,1,float16,float16,0,0.2779695987701416
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,2,128,1,float16,fp8,0,0.26590240001678467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,2,128,1,fp8,fp8,0,0.27546238899230957
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,4,128,1,float16,float16,0,0.2722287893295288
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,4,128,1,float16,fp8,0,0.2663840055465698
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,4,128,1,fp8,fp8,0,0.27437601089477537
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,8,128,1,float16,float16,0,0.2760351896286011
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,8,128,1,float16,fp8,0,0.27654080390930175
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,96,128,1,float16,float16,0,0.1960911989212036
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,96,128,1,float16,fp8,0,0.1840016007423401
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,1,128,1,float16,float16,0,0.1500815987586975
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,1,128,1,float16,fp8,0,0.1449280023574829
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,1,128,1,fp8,fp8,0,0.145687997341156
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,2,128,1,float16,float16,0,0.14981759786605836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,4,128,1,fp8,fp8,0,1.0107328414916992
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,2,128,1,float16,fp8,0,0.14477920532226562
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,2,128,1,fp8,fp8,0,0.1453744053840637
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,4,128,1,float16,float16,0,0.14984480142593384
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,4,128,1,float16,fp8,0,0.14405280351638794
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,8,128,1,float16,float16,0,0.15207040309906006
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,4,128,1,fp8,fp8,0,0.14731040000915527
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,8,128,1,float16,fp8,0,0.14517279863357543
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,96,128,1,float16,float16,0,0.10924479961395264
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,96,128,1,fp8,fp8,0,0.10225759744644165
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,1,128,1,float16,float16,0,0.08645439743995667
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,1,128,1,float16,fp8,0,0.08231040239334106
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,1,128,1,fp8,fp8,0,0.08356959819793701
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,2,128,1,float16,float16,0,0.08623840212821961
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,2,128,1,float16,fp8,0,0.08363839983940125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,2,128,1,fp8,fp8,0,0.08250880241394043
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,4,128,1,float16,float16,0,0.08684639930725098
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,4,128,1,float16,fp8,0,0.08235039710998535
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,4,128,1,fp8,fp8,0,0.08372160196304321
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,8,128,1,float16,float16,0,0.08704800009727479
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,8,128,1,float16,fp8,0,0.08266879916191101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,8,128,1,float16,fp8,0,2.0781152725219725
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,8,128,1,fp8,fp8,0,0.0824512004852295
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,8,128,1,float16,fp8,0,0.5096831798553467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,8,128,1,fp8,fp8,0,0.26702558994293213
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,96,128,1,fp8,fp8,0,0.1815567970275879
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,8,128,1,fp8,fp8,0,0.14569920301437378
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,96,128,1,float16,fp8,0,0.10275520086288452
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,1,128,1,float16,fp8,0,4.237260818481445
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,1,128,1,float16,float16,0,4.621388626098633
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,2,128,1,float16,float16,0,4.481913757324219
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,1,128,1,fp8,fp8,0,4.231068801879883
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,2,128,1,float16,fp8,0,4.2389568328857425
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,2,128,1,fp8,fp8,0,4.228582382202148
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,4,128,1,float16,fp8,0,4.233478546142578
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,4,128,1,float16,float16,0,4.693603134155273
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,4,128,1,fp8,fp8,0,4.231436920166016
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,8,128,1,fp8,fp8,0,4.224619293212891
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,96,128,1,float16,fp8,0,2.8877775192260744
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,96,128,1,float16,float16,0,3.2958526611328125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,8,128,1,float16,float16,0,4.882772827148438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,96,128,1,fp8,fp8,0,2.9845903396606444
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,1,128,1,float16,float16,0,2.159297561645508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,1,128,1,float16,fp8,0,2.124964714050293
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,1,128,1,fp8,fp8,0,2.1251935958862305
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,2,128,1,float16,float16,0,2.3228271484375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,2,128,1,float16,fp8,0,2.2616159439086916
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,2,128,1,fp8,fp8,0,2.124928092956543
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,8,128,1,float16,fp8,0,4.290249633789062
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,4,128,1,float16,float16,0,2.4048959732055666
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,4,128,1,float16,fp8,0,2.1226383209228517
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,4,128,1,fp8,fp8,0,2.123148727416992
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,8,128,1,float16,fp8,0,2.1388015747070312
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,8,128,1,float16,float16,0,2.3514495849609376
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,96,128,1,float16,float16,0,1.6097055435180665
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,8,128,1,fp8,fp8,0,2.1239295959472657
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,1,128,1,float16,float16,0,1.068400001525879
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,96,128,1,fp8,fp8,0,1.4522656440734862
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,1,128,1,float16,fp8,0,1.0734784126281738
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,1,128,1,fp8,fp8,0,1.0739359855651855
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,96,128,1,float16,fp8,0,1.5230400085449218
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,2,128,1,float16,float16,0,1.1985919952392579
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,2,128,1,float16,fp8,0,1.1129103660583497
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,2,128,1,fp8,fp8,0,1.0708271980285644
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,4,128,1,float16,float16,0,1.1352383613586425
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,4,128,1,float16,fp8,0,1.0710543632507323
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,4,128,1,fp8,fp8,0,1.0727279663085938
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,8,128,1,float16,fp8,0,1.0869999885559083
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,96,128,1,float16,float16,0,0.8135536193847657
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,8,128,1,fp8,fp8,0,1.1392911911010741
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,1,128,1,float16,float16,0,0.5433728218078613
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,96,128,1,fp8,fp8,0,0.7698400020599365
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,1,128,1,float16,fp8,0,0.5461840152740478
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,1,128,1,fp8,fp8,0,0.5499919891357422
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,2,128,1,float16,float16,0,0.5453824043273926
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,2,128,1,float16,fp8,0,0.5586143970489502
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,2,128,1,fp8,fp8,0,0.5449711799621582
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,4,128,1,float16,float16,0,0.5789904117584228
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,4,128,1,float16,fp8,0,0.5458640098571778
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,4,128,1,fp8,fp8,0,0.5454991817474365
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,8,128,1,float16,float16,0,0.5635615825653076
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,96,128,1,float16,float16,0,0.4198607921600342
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,8,128,1,fp8,fp8,0,0.54508957862854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,96,128,1,fp8,fp8,0,0.3790575981140137
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,8,128,1,float16,fp8,0,0.5513152122497559
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,1,128,1,float16,float16,0,0.28661279678344725
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,1,128,1,float16,fp8,0,0.28807840347290037
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,1,128,1,fp8,fp8,0,0.2821471929550171
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,2,128,1,float16,float16,0,0.2823856115341187
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,2,128,1,float16,fp8,0,0.2837968111038208
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,2,128,1,fp8,fp8,0,0.28597919940948485
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,4,128,1,float16,fp8,0,0.2826495885848999
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,4,128,1,fp8,fp8,0,0.28280799388885497
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,8,128,1,float16,float16,0,0.29034080505371096
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,8,128,1,float16,fp8,0,0.28344800472259524
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,8,128,1,fp8,fp8,0,0.28223519325256347
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,96,128,1,float16,fp8,0,0.2010576009750366
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,1,128,1,float16,float16,0,0.15259840488433837
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,96,128,1,fp8,fp8,0,0.20052800178527833
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,1,128,1,float16,fp8,0,0.15124000310897828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,1,128,1,fp8,fp8,0,0.150491201877594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,2,128,1,float16,float16,0,0.15337280035018921
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,2,128,1,float16,fp8,0,0.1509392023086548
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,8,128,1,float16,float16,0,1.101257610321045
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,2,128,1,fp8,fp8,0,0.1505679965019226
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,4,128,1,float16,float16,0,0.15507999658584595
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,4,128,1,float16,fp8,0,0.1514240026473999
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,96,128,1,float16,fp8,0,0.7371391773223877
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,8,128,1,float16,float16,0,0.15693919658660888
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,8,128,1,float16,fp8,0,0.15227999687194824
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,8,128,1,fp8,fp8,0,0.15198080539703368
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,96,128,1,float16,float16,0,0.12009919881820678
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,96,128,1,fp8,fp8,0,0.11047199964523316
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,1,128,1,float16,float16,0,0.08686720132827759
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,1,128,1,float16,fp8,0,0.08454239964485169
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,1,128,1,fp8,fp8,0,0.08466079831123352
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,2,128,1,float16,float16,0,0.08671360015869141
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,96,128,1,float16,fp8,0,0.10925439596176148
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,4,128,1,float16,float16,0,0.08797760009765625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,4,128,1,float16,fp8,0,0.08387200236320495
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,4,128,1,fp8,fp8,0,0.0843936026096344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,2,128,1,float16,fp8,0,0.08468959927558899
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,8,128,1,float16,float16,0,0.08934080004692077
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,8,128,1,float16,fp8,0,0.08474720120429993
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,96,128,1,float16,float16,0,0.0693552017211914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,96,128,1,float16,fp8,0,0.06315680146217346
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,96,128,1,fp8,fp8,0,0.06306399703025818
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,1,128,1,float16,float16,0,0.05276640057563782
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,1,128,1,float16,fp8,0,0.05070880055427551
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,96,128,1,float16,fp8,0,0.37889599800109863
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,1,128,1,fp8,fp8,0,0.05130400061607361
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,2,128,1,float16,float16,0,0.05249119997024536
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,2,128,1,float16,fp8,0,0.05087360143661499
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,2,128,1,fp8,fp8,0,0.050507199764251706
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,4,128,1,float16,fp8,0,0.050780802965164185
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,4,128,1,fp8,fp8,0,0.050811201333999634
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,8,128,1,float16,float16,0,0.05316799879074097
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,4,128,1,float16,float16,0,0.05262879729270935
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,8,128,1,float16,fp8,0,0.050809597969055174
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,8,128,1,fp8,fp8,0,0.05097439885139465
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,4,128,1,float16,float16,0,0.29007198810577395
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,96,128,1,float16,float16,0,0.2168191909790039
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,4,128,1,fp8,fp8,0,0.15104639530181885
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,2,128,1,fp8,fp8,0,0.08407199978828431
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,8,128,1,fp8,fp8,0,0.08465759754180908
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,96,1,128,1,float16,float16,0,3.2540447235107424
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,96,1,128,1,float16,fp8,0,3.2549823760986327
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,96,1,128,1,fp8,fp8,0,3.255788803100586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,96,2,128,1,float16,float16,0,3.2570526123046877
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,96,2,128,1,float16,fp8,0,3.2534046173095703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,96,2,128,1,fp8,fp8,0,3.252339172363281
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,96,4,128,1,float16,float16,0,3.262736129760742
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,96,4,128,1,float16,fp8,0,3.25302734375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,96,4,128,1,fp8,fp8,0,3.2868385314941406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,96,8,128,1,float16,fp8,0,3.242679977416992
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,96,8,128,1,float16,float16,0,3.3390560150146484
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,96,8,128,1,fp8,fp8,0,3.2903934478759767
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,96,96,128,1,float16,float16,0,2.6461280822753905
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,96,96,128,1,float16,fp8,0,2.380415916442871
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,96,1,128,1,float16,float16,0,1.5922127723693849
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,96,1,128,1,float16,fp8,0,1.6380191802978517
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,96,96,128,1,fp8,fp8,0,2.4187007904052735
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,96,1,128,1,fp8,fp8,0,1.6354015350341797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,96,2,128,1,float16,float16,0,1.6881872177124024
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,96,2,128,1,float16,fp8,0,1.6330047607421876
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,96,4,128,1,float16,float16,0,1.6117168426513673
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,96,2,128,1,fp8,fp8,0,1.6768144607543944
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,96,4,128,1,float16,fp8,0,1.6844575881958008
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,96,4,128,1,fp8,fp8,0,1.6340240478515624
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,96,8,128,1,float16,float16,0,1.6519088745117188
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,96,96,128,1,float16,float16,0,1.3125807762145996
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,96,8,128,1,float16,fp8,0,1.628887939453125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,96,8,128,1,fp8,fp8,0,1.6530960083007813
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,96,1,128,1,float16,float16,0,0.8086383819580079
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,96,96,128,1,float16,fp8,0,1.2105135917663574
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,96,1,128,1,float16,fp8,0,0.8267775535583496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,96,96,128,1,fp8,fp8,0,1.2028863906860352
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,96,1,128,1,fp8,fp8,0,0.8748255729675293
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,96,2,128,1,float16,fp8,0,0.8235103607177734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,96,2,128,1,fp8,fp8,0,0.8240943908691406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,96,4,128,1,float16,float16,0,0.8187439918518067
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,96,4,128,1,float16,fp8,0,0.8253999710083008
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,96,4,128,1,fp8,fp8,0,0.8249296188354492
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,96,8,128,1,float16,float16,0,0.8348624229431152
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,96,8,128,1,float16,fp8,0,0.8448351860046387
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,96,96,128,1,float16,float16,0,0.6625999927520752
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,96,8,128,1,fp8,fp8,0,0.8226991653442383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,96,96,128,1,float16,fp8,0,0.6188144207000732
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,96,1,128,1,float16,fp8,0,0.42073922157287597
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,96,1,128,1,float16,float16,0,0.41060800552368165
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,96,1,128,1,fp8,fp8,0,0.42487201690673826
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,96,96,128,1,fp8,fp8,0,0.606663990020752
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,96,2,128,1,float16,float16,0,0.4102960109710693
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,96,2,128,1,fp8,fp8,0,0.4198304176330566
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,96,2,128,1,float16,fp8,0,0.4194672107696533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,96,4,128,1,float16,float16,0,0.41685757637023924
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,96,4,128,1,float16,fp8,0,0.42013120651245117
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,96,8,128,1,float16,fp8,0,0.4194143772125244
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,96,8,128,1,fp8,fp8,0,0.41932001113891604
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,96,96,128,1,float16,float16,0,0.3392672061920166
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,96,1,128,1,float16,float16,0,0.2139280080795288
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,96,8,128,1,float16,float16,0,0.42499680519104005
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,96,1,128,1,float16,fp8,0,0.21772799491882325
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,96,2,128,1,float16,float16,0,0.21446080207824708
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,96,1,128,1,fp8,fp8,0,0.2177839994430542
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,96,2,128,1,fp8,fp8,0,0.21805601119995116
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,96,4,128,1,float16,float16,0,0.2163952112197876
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,96,4,128,1,float16,fp8,0,0.21734719276428222
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,96,4,128,1,fp8,fp8,0,0.21827518939971924
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,96,8,128,1,float16,float16,0,0.2216655969619751
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,96,8,128,1,float16,fp8,0,0.2176448106765747
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,96,8,128,1,fp8,fp8,0,0.2180943965911865
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,96,96,128,1,float16,fp8,0,0.16414400339126586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,96,96,128,1,fp8,fp8,0,0.16475839614868165
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,96,1,128,1,float16,float16,0,0.11604640483856202
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,96,1,128,1,float16,fp8,0,0.11751199960708618
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,96,2,128,1,float16,float16,0,0.11754239797592163
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,96,2,128,1,float16,fp8,0,0.117467200756073
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,96,2,128,1,fp8,fp8,0,0.11753920316696168
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,96,4,128,1,float16,float16,0,0.11832159757614136
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,96,4,128,1,float16,fp8,0,0.11736639738082885
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,96,4,128,1,fp8,fp8,0,0.11738879680633545
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,96,8,128,1,float16,float16,0,0.12043039798736573
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,96,4,128,1,fp8,fp8,0,0.4223055839538574
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,96,8,128,1,float16,fp8,0,0.11784640550613404
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,96,8,128,1,fp8,fp8,0,0.1172144055366516
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,96,96,128,1,float16,float16,0,0.10020480155944825
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,96,96,128,1,float16,fp8,0,0.3269167900085449
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,96,96,128,1,fp8,fp8,0,0.0911520004272461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,96,1,128,1,float16,float16,0,0.06706560254096985
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,96,1,128,1,float16,fp8,0,0.06568480134010315
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,96,2,128,1,float16,fp8,0,0.21809279918670654
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,96,1,128,1,fp8,fp8,0,0.06578559875488281
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,96,2,128,1,float16,float16,0,0.06705120205879211
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,96,2,128,1,float16,fp8,0,0.06599519848823547
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,96,2,128,1,fp8,fp8,0,0.06593440175056457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,96,4,128,1,float16,float16,0,0.0682479977607727
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,96,4,128,1,float16,fp8,0,0.06598719954490662
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,96,4,128,1,fp8,fp8,0,0.06602720022201539
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,96,8,128,1,float16,float16,0,0.06932640075683594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,96,8,128,1,float16,fp8,0,0.06599199771881104
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,96,8,128,1,fp8,fp8,0,0.0657920002937317
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,96,96,128,1,float16,float16,0,0.05654240250587463
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,96,96,128,1,float16,fp8,0,0.05081440210342407
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,96,96,128,1,fp8,fp8,0,0.05122079849243164
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,96,96,128,1,float16,float16,0,0.17910879850387573
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,96,1,128,1,float16,fp8,0,0.039131200313568114
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,96,1,128,1,fp8,fp8,0,0.039073601365089417
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,96,2,128,1,float16,float16,0,0.03923520147800445
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,96,2,128,1,float16,fp8,0,0.03903999924659729
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,96,2,128,1,float16,float16,0,0.8290176391601562
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,96,2,128,1,fp8,fp8,0,0.03912799954414368
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,96,1,128,1,float16,float16,0,0.039241600036621097
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,96,4,128,1,fp8,fp8,0,0.0391184002161026
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,96,8,128,1,float16,float16,0,0.040403199195861814
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,96,8,128,1,float16,fp8,0,0.03914400041103363
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,96,8,128,1,fp8,fp8,0,0.039059200882911684
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,96,96,128,1,float16,float16,0,0.03299359977245331
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,96,96,128,1,float16,fp8,0,0.03299359977245331
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,96,96,128,1,fp8,fp8,0,0.032979199290275575
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,96,1,128,1,float16,float16,0,0.027167999744415285
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,96,1,128,1,float16,fp8,0,0.026859200000762938
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,96,1,128,1,fp8,fp8,0,0.026926401257514953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,96,2,128,1,float16,fp8,0,0.026875200867652892
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,96,2,128,1,fp8,fp8,0,0.027003198862075806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,96,4,128,1,float16,float16,0,0.02794240117073059
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,96,4,128,1,float16,fp8,0,0.02688480019569397
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,96,4,128,1,fp8,fp8,0,0.02683199942111969
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,96,8,128,1,float16,float16,0,0.028019198775291444
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,96,8,128,1,float16,fp8,0,0.026820799708366393
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,96,8,128,1,fp8,fp8,0,0.026844799518585205
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,96,96,128,1,float16,fp8,0,0.09045119881629944
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,96,1,128,1,float16,float16,0,1.3081680297851563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,96,1,128,1,float16,fp8,0,1.3715231895446778
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,96,4,128,1,float16,fp8,0,0.0390608012676239
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,96,4,128,1,float16,float16,0,0.039724799990653994
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,96,2,128,1,float16,float16,0,0.027825599908828734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,96,1,128,1,fp8,fp8,0,1.374342441558838
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,96,2,128,1,float16,float16,0,1.3078800201416017
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,96,2,128,1,float16,fp8,0,1.3696880340576172
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,96,96,128,1,fp8,fp8,0,0.31268959045410155
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,96,4,128,1,float16,float16,0,1.3275728225708008
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,96,2,128,1,fp8,fp8,0,1.3695232391357421
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,96,4,128,1,float16,fp8,0,1.3704159736633301
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,96,4,128,1,fp8,fp8,0,1.3914112091064452
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,96,8,128,1,float16,float16,0,1.3658368110656738
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,96,8,128,1,float16,fp8,0,1.3689616203308106
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,96,96,128,1,float16,float16,0,1.1530863761901855
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,96,8,128,1,fp8,fp8,0,1.3686944007873536
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,96,96,128,1,float16,fp8,0,1.0633328437805176
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,96,1,128,1,float16,float16,0,0.6603839874267579
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,96,96,128,1,fp8,fp8,0,1.0633184432983398
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,96,1,128,1,float16,fp8,0,0.6942527770996094
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,96,1,128,1,fp8,fp8,0,0.7009776115417481
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,96,2,128,1,float16,float16,0,0.6618080139160156
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,96,2,128,1,fp8,fp8,0,0.6923903942108154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,96,4,128,1,float16,float16,0,0.6698463916778564
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,96,4,128,1,float16,fp8,0,0.6923840045928955
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,96,8,128,1,float16,float16,0,0.693393611907959
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,96,4,128,1,fp8,fp8,0,0.6925631999969483
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,96,8,128,1,float16,fp8,0,0.6922832012176514
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,96,8,128,1,fp8,fp8,0,0.6919439792633056
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,96,96,128,1,float16,float16,0,0.5845871925354004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,96,1,128,1,float16,float16,0,0.33766560554504393
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,96,96,128,1,float16,fp8,0,0.5389039993286133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,96,1,128,1,float16,fp8,0,0.3528559923171997
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,96,1,128,1,fp8,fp8,0,0.35265278816223145
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,96,2,128,1,float16,float16,0,0.3375040054321289
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,96,2,128,1,float16,fp8,0,0.35189759731292725
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,96,2,128,1,fp8,fp8,0,0.35249600410461424
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,96,4,128,1,float16,fp8,0,0.35203359127044676
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,96,4,128,1,fp8,fp8,0,0.35163679122924807
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,96,8,128,1,float16,float16,0,0.35139999389648435
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,96,8,128,1,float16,fp8,0,0.3519887924194336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,96,8,128,1,fp8,fp8,0,0.35193281173706054
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,96,96,128,1,float16,float16,0,0.2988352060317993
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,96,96,128,1,float16,fp8,0,0.27500319480895996
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,96,1,128,1,fp8,fp8,0,0.11694400310516358
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,96,1,128,1,float16,float16,0,0.17541120052337647
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,96,1,128,1,float16,fp8,0,0.18097440004348755
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,96,1,128,1,fp8,fp8,0,0.1811087965965271
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,96,2,128,1,float16,float16,0,0.17530080080032348
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,96,2,128,1,float16,fp8,0,0.18091679811477662
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,96,2,128,1,fp8,fp8,0,0.18122559785842896
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,96,4,128,1,float16,float16,0,0.17782880067825318
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,96,4,128,1,float16,fp8,0,0.18122559785842896
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,96,8,128,1,float16,float16,0,0.1829584002494812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,96,8,128,1,float16,fp8,0,0.18180160522460936
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,96,8,128,1,fp8,fp8,0,0.1812351942062378
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,96,96,128,1,float16,float16,0,0.15947680473327636
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,96,2,128,1,float16,fp8,0,0.6932479858398437
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,96,4,128,1,fp8,fp8,0,0.18073439598083496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,96,96,128,1,float16,fp8,0,0.1459887981414795
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,96,1,128,1,float16,float16,0,0.0970960021018982
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,96,1,128,1,float16,fp8,0,0.09883520007133484
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,96,2,128,1,float16,float16,0,0.09739360213279724
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,96,2,128,1,float16,fp8,0,0.09900959730148315
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,96,2,128,1,fp8,fp8,0,0.0987936019897461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,96,4,128,1,float16,fp8,0,0.09921119809150696
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,96,4,128,1,float16,float16,0,0.09871680140495301
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,96,4,128,1,fp8,fp8,0,0.09898719787597657
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,96,8,128,1,float16,fp8,0,0.09943040013313294
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,96,8,128,1,fp8,fp8,0,0.09921280145645142
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,96,96,128,1,fp8,fp8,0,0.5407584190368653
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,96,8,128,1,float16,float16,0,0.10083999633789062
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,96,96,128,1,float16,float16,0,0.08896960020065307
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,96,96,128,1,float16,fp8,0,0.0805184006690979
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,96,96,128,1,fp8,fp8,0,0.08188959956169128
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,96,1,128,1,float16,fp8,0,0.05550079941749573
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,96,1,128,1,fp8,fp8,0,0.055731201171875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,96,4,128,1,float16,float16,0,0.34303200244903564
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,96,2,128,1,float16,float16,0,0.056032001972198486
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,96,2,128,1,float16,fp8,0,0.05573599934577942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,96,2,128,1,fp8,fp8,0,0.05576800107955933
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,96,4,128,1,float16,float16,0,0.05755680203437805
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,96,4,128,1,float16,fp8,0,0.05589119791984558
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,96,4,128,1,fp8,fp8,0,0.055764800310134886
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,96,8,128,1,float16,float16,0,0.058689600229263304
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,96,8,128,1,float16,fp8,0,0.05580000281333923
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,96,8,128,1,fp8,fp8,0,0.05603839755058289
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,96,96,128,1,float16,fp8,0,0.04383679926395416
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,96,1,128,1,float16,float16,0,0.03283360004425049
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,96,1,128,1,float16,fp8,0,0.033137598633766176
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,96,96,128,1,fp8,fp8,0,0.27526240348815917
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,96,2,128,1,float16,float16,0,0.03267039954662323
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,96,2,128,1,float16,fp8,0,0.0324176013469696
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,96,2,128,1,fp8,fp8,0,0.032574400305747986
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,96,4,128,1,float16,float16,0,0.032892799377441405
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,96,4,128,1,float16,fp8,0,0.031472000479698184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,96,4,128,1,fp8,fp8,0,0.03178240060806274
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,96,8,128,1,float16,float16,0,0.032974401116371156
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,96,96,128,1,fp8,fp8,0,0.14562879800796508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,96,8,128,1,float16,fp8,0,0.03197920024394989
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,96,8,128,1,fp8,fp8,0,0.032576000690460204
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,96,1,128,1,fp8,fp8,0,0.0987119972705841
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,96,96,128,1,float16,fp8,0,0.028915199637413024
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,96,96,128,1,fp8,fp8,0,0.028915199637413024
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,96,1,128,1,float16,float16,0,0.022812800109386445
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,96,1,128,1,float16,fp8,0,0.022735999524593355
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,96,1,128,1,fp8,fp8,0,0.02276639938354492
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,96,2,128,1,float16,float16,0,0.02277279943227768
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,96,2,128,1,float16,fp8,0,0.022737599909305573
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,96,2,128,1,fp8,fp8,0,0.023387199640274046
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,96,4,128,1,float16,float16,0,0.0236272007226944
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,96,4,128,1,float16,fp8,0,0.023046399652957916
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,96,4,128,1,fp8,fp8,0,0.02332639992237091
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,96,8,128,1,float16,float16,0,0.024827200174331664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,96,8,128,1,fp8,fp8,0,0.023160000145435334
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,96,96,128,1,float16,float16,0,0.018695999681949616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,96,96,128,1,float16,fp8,0,0.018670399487018586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,96,96,128,1,fp8,fp8,0,0.018636800348758698
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,96,1,128,1,float16,float16,0,0.016620799899101257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,96,1,128,1,float16,float16,0,0.05610719919204712
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,96,1,128,1,fp8,fp8,0,0.014596800506114959
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,96,2,128,1,float16,float16,0,0.01655679941177368
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,96,2,128,1,float16,fp8,0,0.014646400511264802
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,96,1,128,1,float16,fp8,0,0.015113599598407745
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,96,2,128,1,fp8,fp8,0,0.01464959979057312
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,96,4,128,1,float16,float16,0,0.016624000668525696
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,96,4,128,1,float16,fp8,0,0.01488959938287735
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,96,4,128,1,fp8,fp8,0,0.01552480012178421
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,96,96,128,1,float16,float16,0,0.04928640127182007
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,96,8,128,1,float16,fp8,0,0.016607999801635742
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,96,96,128,1,fp8,fp8,0,0.043219199776649474
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,96,1,128,1,fp8,fp8,0,0.0325408011674881
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,96,1,128,1,float16,float16,0,0.7993599891662597
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,96,1,128,1,float16,fp8,0,0.8321087837219239
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,96,1,128,1,fp8,fp8,0,0.8324015617370606
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,96,2,128,1,float16,float16,0,0.7987967967987061
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,96,96,128,1,float16,float16,0,0.02784479856491089
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,96,2,128,1,float16,fp8,0,0.8304592132568359
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,96,4,128,1,float16,float16,0,0.8091584205627441
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,96,8,128,1,float16,fp8,0,0.02335519939661026
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,96,4,128,1,fp8,fp8,0,0.8302463531494141
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,96,8,128,1,float16,float16,0,0.8275792121887207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,96,8,128,1,float16,fp8,0,0.8293519973754883
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,96,96,128,1,float16,float16,0,0.6480432033538819
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,96,8,128,1,fp8,fp8,0,0.8289600372314453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,96,8,128,1,float16,float16,0,0.016467200219631196
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,96,8,128,1,fp8,fp8,0,0.015372799336910247
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,96,1,128,1,float16,float16,0,0.4066671848297119
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,96,1,128,1,float16,fp8,0,0.421665620803833
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,96,1,128,1,fp8,fp8,0,0.4218592166900635
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,96,2,128,1,float16,float16,0,0.40653600692749026
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,96,2,128,1,float16,fp8,0,0.42206878662109376
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,96,2,128,1,fp8,fp8,0,0.4216288089752197
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,96,4,128,1,float16,float16,0,0.41049761772155763
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,96,4,128,1,float16,fp8,0,0.4209104061126709
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,96,4,128,1,fp8,fp8,0,0.42187042236328126
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,96,8,128,1,float16,float16,0,0.4200911998748779
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,96,8,128,1,float16,fp8,0,0.4205935955047607
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,96,8,128,1,fp8,fp8,0,0.4204063892364502
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,96,96,128,1,float16,float16,0,0.33086400032043456
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,96,96,128,1,float16,fp8,0,0.3092144012451172
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,96,1,128,1,float16,float16,0,0.20983200073242186
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,96,1,128,1,float16,fp8,0,0.2160207986831665
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,96,2,128,1,fp8,fp8,0,0.8314319610595703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,96,1,128,1,fp8,fp8,0,0.21579999923706056
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,96,4,128,1,float16,fp8,0,0.8294320106506348
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,96,2,128,1,float16,float16,0,0.21017119884490967
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,96,2,128,1,fp8,fp8,0,0.21601600646972657
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,96,4,128,1,float16,float16,0,0.2116463899612427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,96,4,128,1,float16,fp8,0,0.21594719886779784
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,96,4,128,1,fp8,fp8,0,0.2159503936767578
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,96,8,128,1,float16,float16,0,0.21626079082489014
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,96,8,128,1,float16,fp8,0,0.2159791946411133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,96,8,128,1,fp8,fp8,0,0.2161087989807129
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,96,96,128,1,float16,float16,0,0.1708799958229065
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,96,96,128,1,float16,fp8,0,0.16001280546188354
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,96,96,128,1,fp8,fp8,0,0.15992799997329712
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,96,1,128,1,float16,float16,0,0.11099840402603149
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,96,1,128,1,float16,fp8,0,0.11296800374984742
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,96,1,128,1,fp8,fp8,0,0.11292480230331421
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,96,96,128,1,float16,fp8,0,0.6038527965545655
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,96,2,128,1,float16,float16,0,0.11064159870147705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,96,2,128,1,float16,fp8,0,0.11283199787139893
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,96,2,128,1,fp8,fp8,0,0.11304639577865601
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,96,96,128,1,fp8,fp8,0,0.6056000232696533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,96,4,128,1,float16,float16,0,0.11239360570907593
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,96,8,128,1,float16,float16,0,0.11417280435562134
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,96,8,128,1,fp8,fp8,0,0.11311839818954468
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,96,8,128,1,float16,fp8,0,0.11305279731750488
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,96,96,128,1,float16,fp8,0,0.08622239828109741
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,96,96,128,1,fp8,fp8,0,0.08631839752197265
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,96,1,128,1,float16,float16,0,0.061768001317977904
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,96,1,128,1,float16,fp8,0,0.061710399389266965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,96,1,128,1,fp8,fp8,0,0.06157919764518738
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,96,2,128,1,float16,float16,0,0.06226400136947632
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,96,2,128,1,float16,fp8,0,0.061596798896789554
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,96,2,128,1,fp8,fp8,0,0.06165120005607605
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,96,4,128,1,float16,float16,0,0.06271679997444153
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,96,4,128,1,float16,fp8,0,0.06213279962539673
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,96,4,128,1,fp8,fp8,0,0.061710399389266965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,96,8,128,1,float16,float16,0,0.06402559876441956
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,96,8,128,1,float16,fp8,0,0.06161919832229614
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,96,8,128,1,fp8,fp8,0,0.061660802364349364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,96,96,128,1,float16,float16,0,0.05171520113945007
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,96,96,128,1,float16,fp8,0,0.04852960109710693
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,96,96,128,1,fp8,fp8,0,0.0484607994556427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,96,1,128,1,float16,float16,0,0.03544319868087768
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,96,1,128,1,float16,fp8,0,0.03629119992256165
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,96,1,128,1,fp8,fp8,0,0.03669919967651367
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,96,2,128,1,float16,float16,0,0.03516480028629303
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,96,2,128,1,float16,fp8,0,0.036904001235961915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,96,96,128,1,fp8,fp8,0,0.30912320613861083
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,96,4,128,1,float16,float16,0,0.0366703987121582
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,96,4,128,1,float16,fp8,0,0.03698239922523498
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,96,4,128,1,fp8,fp8,0,0.03701280057430267
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,96,8,128,1,float16,float16,0,0.03683359920978546
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,96,8,128,1,float16,fp8,0,0.036878401041030885
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,96,2,128,1,float16,fp8,0,0.21573119163513182
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,96,96,128,1,float16,float16,0,0.026855999231338502
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,96,96,128,1,float16,fp8,0,0.026881599426269533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,96,96,128,1,fp8,fp8,0,0.027827200293540955
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,96,1,128,1,float16,float16,0,0.021878400444984437
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,96,1,128,1,float16,fp8,0,0.022756800055503845
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,96,1,128,1,fp8,fp8,0,0.022598400712013245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,96,2,128,1,float16,float16,0,0.022643199563026427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,96,2,128,1,fp8,fp8,0,0.022627200186252593
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,96,4,128,1,float16,float16,0,0.022467200458049775
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,96,4,128,1,float16,fp8,0,0.021427200734615327
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,96,4,128,1,fp8,fp8,0,0.022457599639892578
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,96,8,128,1,float16,float16,0,0.02269120067358017
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,96,8,128,1,float16,fp8,0,0.02245279997587204
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,96,8,128,1,fp8,fp8,0,0.022433599829673766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,96,96,128,1,float16,float16,0,0.018673600256443025
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,96,96,128,1,float16,fp8,0,0.01866399943828583
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,96,96,128,1,fp8,fp8,0,0.018644799292087556
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,96,1,128,1,float16,float16,0,0.016638399660587312
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,96,1,128,1,float16,fp8,0,0.016606399416923524
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,96,1,128,1,fp8,fp8,0,0.016630400717258454
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,96,4,128,1,float16,fp8,0,0.11281759738922119
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,96,2,128,1,float16,fp8,0,0.01664000004529953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,96,4,128,1,fp8,fp8,0,0.11294879913330078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,96,2,128,1,fp8,fp8,0,0.01655679941177368
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,96,4,128,1,float16,float16,0,0.01661919951438904
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,96,4,128,1,float16,fp8,0,0.016569599509239197
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,96,4,128,1,fp8,fp8,0,0.01656640022993088
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,96,8,128,1,float16,float16,0,0.01659200042486191
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,96,8,128,1,float16,fp8,0,0.01658560037612915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,96,8,128,1,fp8,fp8,0,0.01655520051717758
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,96,96,128,1,float16,float16,0,0.014563199877738953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,96,96,128,1,float16,fp8,0,0.012694400548934937
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,96,96,128,1,fp8,fp8,0,0.012478400021791458
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,96,1,128,1,float16,float16,0,0.012516799569129943
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,96,1,128,1,float16,fp8,0,0.01250080019235611
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,96,2,128,1,float16,fp8,0,0.012408000230789185
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,96,2,128,1,float16,float16,0,0.012444800138473511
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,96,2,128,1,fp8,fp8,0,0.012425599992275238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,96,4,128,1,float16,float16,0,0.011691199988126755
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,96,4,128,1,float16,fp8,0,0.012508800625801087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,96,4,128,1,fp8,fp8,0,0.012433599680662155
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,96,8,128,1,float16,float16,0,0.012444800138473511
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,96,8,128,1,float16,fp8,0,0.012505599856376648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,96,8,128,1,fp8,fp8,0,0.0124719999730587
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,96,1,128,1,float16,float16,0,0.6131792068481445
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,96,2,128,1,fp8,fp8,0,0.036268800497055054
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,96,8,128,1,fp8,fp8,0,0.03691200017929077
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,96,1,128,1,float16,fp8,0,0.6310783863067627
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,96,1,128,1,fp8,fp8,0,0.6304160118103027
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,96,2,128,1,float16,fp8,0,0.022511999309062957
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,96,2,128,1,float16,float16,0,0.6144864082336425
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,96,2,128,1,float16,fp8,0,0.6308447837829589
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,96,2,128,1,fp8,fp8,0,0.6301136016845703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,96,4,128,1,float16,float16,0,0.6184576034545899
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,96,2,128,1,float16,float16,0,0.016582399606704712
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,96,4,128,1,float16,fp8,0,0.6305856227874755
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,96,96,128,1,float16,float16,0,0.0921392023563385
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,96,4,128,1,fp8,fp8,0,0.6300191879272461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,96,8,128,1,float16,float16,0,0.6268608093261718
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,96,1,128,1,fp8,fp8,0,0.011388800293207168
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,96,96,128,1,float16,float16,0,0.4331071853637695
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,96,8,128,1,float16,fp8,0,0.6305744171142578
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,96,8,128,1,fp8,fp8,0,0.6305280208587647
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,96,96,128,1,float16,fp8,0,0.41297760009765627
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,96,96,128,1,fp8,fp8,0,0.41246719360351564
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,96,1,128,1,float16,fp8,0,0.32036800384521485
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,96,1,128,1,fp8,fp8,0,0.3206671953201294
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,96,2,128,1,float16,fp8,0,0.32181439399719236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,96,2,128,1,fp8,fp8,0,0.32027199268341067
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,96,4,128,1,float16,float16,0,0.3156271934509277
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,96,4,128,1,float16,fp8,0,0.3201855897903442
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,96,4,128,1,fp8,fp8,0,0.3201776027679443
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,96,8,128,1,float16,float16,0,0.32031199932098386
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,96,8,128,1,float16,fp8,0,0.3198944091796875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,96,96,128,1,float16,float16,0,0.22237279415130615
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,96,8,128,1,fp8,fp8,0,0.3205391883850098
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,96,96,128,1,float16,fp8,0,0.21156799793243408
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,96,96,128,1,fp8,fp8,0,0.21271679401397706
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,96,1,128,1,float16,float16,0,0.16360479593276978
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,96,1,128,1,fp8,fp8,0,0.1656607985496521
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,96,2,128,1,float16,float16,0,0.1629807949066162
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,96,2,128,1,float16,fp8,0,0.16627520322799683
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,96,4,128,1,float16,float16,0,0.1647487998008728
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,96,1,128,1,float16,fp8,0,0.16518239974975585
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,96,4,128,1,float16,fp8,0,0.1654288053512573
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,96,4,128,1,fp8,fp8,0,0.16575839519500732
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,96,8,128,1,float16,float16,0,0.16649919748306274
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,96,8,128,1,float16,fp8,0,0.1664512038230896
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,96,8,128,1,fp8,fp8,0,0.16605600118637084
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,96,96,128,1,float16,fp8,0,0.11122080087661743
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,96,96,128,1,fp8,fp8,0,0.11128959655761719
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,96,1,128,1,float16,float16,0,0.08781279921531678
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,96,1,128,1,float16,fp8,0,0.08678079843521118
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,96,1,128,1,fp8,fp8,0,0.08682720065116882
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,96,2,128,1,float16,float16,0,0.08775039911270141
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,96,2,128,1,float16,fp8,0,0.08675519824028015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,96,2,128,1,fp8,fp8,0,0.086735999584198
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,96,4,128,1,float16,float16,0,0.0887167990207672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,96,4,128,1,float16,fp8,0,0.08688160181045532
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,96,4,128,1,fp8,fp8,0,0.08708959817886353
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,96,8,128,1,float16,float16,0,0.08987200260162354
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,96,8,128,1,float16,fp8,0,0.0870639979839325
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,96,96,128,1,float16,float16,0,0.06423360109329224
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,96,96,128,1,float16,fp8,0,0.060254400968551634
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,96,96,128,1,fp8,fp8,0,0.06042400002479553
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,96,1,128,1,float16,float16,0,0.04816800057888031
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,96,1,128,1,float16,fp8,0,0.04845280051231384
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,96,1,128,1,fp8,fp8,0,0.04893920123577118
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,96,2,128,1,float16,float16,0,0.048153600096702574
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,96,2,128,1,float16,fp8,0,0.0493151992559433
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,96,2,128,1,fp8,fp8,0,0.047942399978637695
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,96,4,128,1,float16,float16,0,0.04910080134868622
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,96,4,128,1,float16,fp8,0,0.048107200860977174
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,96,4,128,1,fp8,fp8,0,0.04905279874801636
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,96,1,128,1,float16,float16,0,0.3132704019546509
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,96,8,128,1,fp8,fp8,0,0.04843840003013611
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,96,2,128,1,float16,float16,0,0.3137727975845337
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,96,96,128,1,float16,float16,0,0.03495199978351593
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,96,96,128,1,float16,fp8,0,0.03502880036830902
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,96,96,128,1,fp8,fp8,0,0.03519200086593628
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,96,1,128,1,float16,float16,0,0.029598399996757507
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,96,1,128,1,float16,fp8,0,0.028809601068496705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,96,2,128,1,float16,float16,0,0.02892799973487854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,96,2,128,1,float16,fp8,0,0.02889440059661865
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,96,2,128,1,fp8,fp8,0,0.028830400109291075
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,96,4,128,1,float16,float16,0,0.028884801268577575
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,96,4,128,1,float16,fp8,0,0.02881920039653778
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,96,4,128,1,fp8,fp8,0,0.02886880040168762
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,96,2,128,1,fp8,fp8,0,0.16588799953460692
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,96,8,128,1,float16,float16,0,0.02886880040168762
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,96,8,128,1,float16,fp8,0,0.029120001196861266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,96,8,128,1,fp8,fp8,0,0.02892000079154968
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,96,96,128,1,float16,float16,0,0.02115360051393509
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,96,96,128,1,float16,fp8,0,0.020707200467586517
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,96,96,128,1,fp8,fp8,0,0.02072640061378479
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,96,1,128,1,float16,float16,0,0.01863519996404648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,96,96,128,1,float16,float16,0,0.1178704023361206
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,96,1,128,1,fp8,fp8,0,0.018644799292087556
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,96,2,128,1,float16,float16,0,0.018632000684738158
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,96,2,128,1,float16,fp8,0,0.01855040043592453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,96,2,128,1,fp8,fp8,0,0.018676799535751343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,96,4,128,1,float16,float16,0,0.018692800402641298
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,96,4,128,1,float16,fp8,0,0.018544000387191773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,96,4,128,1,fp8,fp8,0,0.018668800592422485
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,96,8,128,1,float16,float16,0,0.01860159933567047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,96,1,128,1,float16,fp8,0,0.01858240067958832
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,96,8,128,1,fp8,fp8,0,0.018648000061511995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,96,96,128,1,float16,fp8,0,0.015852800011634825
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,96,96,128,1,fp8,fp8,0,0.016568000614643096
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,96,8,128,1,fp8,fp8,0,0.0869264006614685
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,96,1,128,1,float16,float16,0,0.01456640064716339
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,96,96,128,1,float16,float16,0,0.016598400473594666
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,96,1,128,1,float16,fp8,0,0.01451359987258911
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,96,1,128,1,fp8,fp8,0,0.014537599682807923
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,96,2,128,1,float16,float16,0,0.014507199823856353
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,96,2,128,1,float16,fp8,0,0.014534400403499603
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,96,4,128,1,float16,float16,0,0.014526399970054626
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,96,4,128,1,float16,fp8,0,0.014532800018787383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,96,4,128,1,fp8,fp8,0,0.014555199444293976
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,96,8,128,1,float16,fp8,0,0.014500799775123595
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,96,2,128,1,fp8,fp8,0,0.014526399970054626
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,96,8,128,1,fp8,fp8,0,0.014539200067520141
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,96,96,128,1,float16,fp8,0,0.012441600114107132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,96,96,128,1,fp8,fp8,0,0.012406399846076966
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,96,8,128,1,float16,float16,0,0.04922879934310913
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,96,1,128,1,float16,float16,0,0.010491199791431427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,96,1,128,1,float16,fp8,0,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,96,8,128,1,float16,fp8,0,0.049091199040412904
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,96,2,128,1,float16,float16,0,0.010502400249242783
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,96,2,128,1,float16,fp8,0,0.010438399761915207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,96,2,128,1,fp8,fp8,0,0.010435199737548828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,96,4,128,1,float16,float16,0,0.010516799986362457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,96,4,128,1,float16,fp8,0,0.010435199737548828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,96,4,128,1,fp8,fp8,0,0.01043040007352829
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,96,8,128,1,float16,float16,0,0.01043040007352829
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,96,8,128,1,float16,fp8,0,0.010515200346708298
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,96,1,128,1,fp8,fp8,0,0.029281601309776306
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,96,1,128,1,float16,float16,0,0.5234208106994629
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,96,1,128,1,float16,fp8,0,0.5390592098236084
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,96,1,128,1,fp8,fp8,0,0.5387360095977783
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,96,2,128,1,float16,float16,0,0.5242784023284912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,96,8,128,1,float16,fp8,0,0.018607999384403228
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,96,2,128,1,float16,fp8,0,0.5374112129211426
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,96,2,128,1,fp8,fp8,0,0.5395311832427978
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,96,4,128,1,float16,float16,0,0.5252831935882568
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,96,4,128,1,float16,fp8,0,0.5377871990203857
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,96,8,128,1,float16,float16,0,0.014528000354766845
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,96,4,128,1,fp8,fp8,0,0.5376815795898438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,96,8,128,1,float16,float16,0,0.529643201828003
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,96,8,128,1,float16,fp8,0,0.5369487762451172
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,96,8,128,1,fp8,fp8,0,0.5375711917877197
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,96,96,128,1,float16,fp8,0,0.3207103967666626
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,96,1,128,1,fp8,fp8,0,0.010476800054311753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,96,96,128,1,fp8,fp8,0,0.3214560031890869
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,96,96,128,1,float16,float16,0,0.01250080019235611
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,96,1,128,1,float16,float16,0,0.26823840141296384
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,96,1,128,1,float16,fp8,0,0.27499680519104003
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,96,1,128,1,fp8,fp8,0,0.27488319873809813
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,96,2,128,1,float16,float16,0,0.267572808265686
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,96,2,128,1,float16,fp8,0,0.2747024059295654
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,96,2,128,1,fp8,fp8,0,0.2741760015487671
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,96,8,128,1,fp8,fp8,0,0.0104592002928257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,96,4,128,1,float16,float16,0,0.26904160976409913
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,96,4,128,1,float16,fp8,0,0.27438080310821533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,96,4,128,1,fp8,fp8,0,0.2748447895050049
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,96,8,128,1,float16,float16,0,0.2710799932479858
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,96,96,128,1,float16,fp8,0,0.16568319797515868
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,96,96,128,1,float16,float16,0,0.16974719762802123
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,96,8,128,1,fp8,fp8,0,0.2741663932800293
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,96,1,128,1,float16,float16,0,0.1408800005912781
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,96,1,128,1,float16,fp8,0,0.14192160367965698
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,96,1,128,1,fp8,fp8,0,0.14202719926834106
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,96,2,128,1,float16,float16,0,0.1415135979652405
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,96,2,128,1,float16,fp8,0,0.14220319986343383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,96,2,128,1,fp8,fp8,0,0.14222079515457153
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,96,4,128,1,float16,float16,0,0.14096959829330444
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,96,4,128,1,float16,fp8,0,0.14171839952468873
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,96,4,128,1,fp8,fp8,0,0.1422160029411316
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,96,8,128,1,float16,float16,0,0.14315999746322633
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,96,96,128,1,float16,float16,0,0.0898256003856659
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,96,8,128,1,float16,fp8,0,0.14175519943237305
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,96,96,128,1,float16,fp8,0,0.08804640173912048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,96,8,128,1,fp8,fp8,0,0.14219520092010499
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,96,96,128,1,fp8,fp8,0,0.0875536024570465
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,96,1,128,1,float16,float16,0,0.07529119849205017
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,96,1,128,1,fp8,fp8,0,0.07536479830741882
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,96,1,128,1,float16,fp8,0,0.07500479817390442
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,96,2,128,1,float16,fp8,0,0.07548800110816956
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,96,4,128,1,float16,float16,0,0.07583039999008179
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,96,4,128,1,float16,fp8,0,0.07499679923057556
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,96,4,128,1,fp8,fp8,0,0.07526400089263915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,96,8,128,1,float16,float16,0,0.0766752004623413
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,96,8,128,1,float16,fp8,0,0.07513440251350403
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,96,96,128,1,float16,float16,0,0.04905279874801636
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,96,8,128,1,fp8,fp8,0,0.07602559924125671
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,96,96,128,1,float16,fp8,0,0.04750880002975464
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,96,96,128,1,fp8,fp8,0,0.04782559871673584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,96,1,128,1,float16,float16,0,0.043305599689483644
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,96,1,128,1,float16,fp8,0,0.042166399955749514
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,96,1,128,1,fp8,fp8,0,0.04226559996604919
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,96,2,128,1,float16,float16,0,0.043156799674034116
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,96,2,128,1,float16,fp8,0,0.042849600315093994
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,96,4,128,1,float16,float16,0,0.04323039948940277
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,96,4,128,1,fp8,fp8,0,0.04311679899692535
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,96,4,128,1,float16,fp8,0,0.04302720129489899
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,96,8,128,1,float16,float16,0,0.04328640103340149
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,96,8,128,1,float16,fp8,0,0.042628800868988036
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,96,8,128,1,fp8,fp8,0,0.043007999658584595
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,96,96,128,1,float16,float16,0,0.028935998678207397
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,96,96,128,1,float16,fp8,0,0.02893120050430298
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,96,96,128,1,float16,float16,0,0.3253216028213501
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,96,1,128,1,float16,float16,0,0.02675040066242218
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,96,1,128,1,float16,fp8,0,0.026027199625968934
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,96,1,128,1,fp8,fp8,0,0.026023998856544495
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,96,2,128,1,float16,float16,0,0.026817598938941957
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,96,8,128,1,float16,fp8,0,0.27483038902282714
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,96,96,128,1,fp8,fp8,0,0.16682080030441285
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,96,2,128,1,fp8,fp8,0,0.026825600862503053
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,96,4,128,1,float16,float16,0,0.026841598749160766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,96,4,128,1,float16,fp8,0,0.02683520019054413
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,96,2,128,1,float16,fp8,0,0.026840001344680786
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,96,4,128,1,fp8,fp8,0,0.02680320143699646
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,96,8,128,1,float16,float16,0,0.026833599805831908
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,96,8,128,1,float16,fp8,0,0.02683840095996857
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,96,96,128,1,float16,float16,0,0.019415999948978423
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,96,96,128,1,float16,fp8,0,0.0186256006360054
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,96,96,128,1,fp8,fp8,0,0.018622399866580965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,96,1,128,1,float16,float16,0,0.016652800142765045
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,96,1,128,1,float16,fp8,0,0.016697600483894348
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,96,1,128,1,fp8,fp8,0,0.01658719927072525
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,96,2,128,1,float16,float16,0,0.01659200042486191
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,96,2,128,1,float16,fp8,0,0.016542400419712066
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,96,2,128,1,fp8,fp8,0,0.07518079876899719
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,96,2,128,1,fp8,fp8,0,0.01663679927587509
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,96,4,128,1,float16,float16,0,0.01658879965543747
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,96,4,128,1,float16,fp8,0,0.01653439998626709
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,96,4,128,1,fp8,fp8,0,0.016607999801635742
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,96,8,128,1,float16,float16,0,0.016575999557971954
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,96,8,128,1,float16,fp8,0,0.01658560037612915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,96,8,128,1,fp8,fp8,0,0.01659200042486191
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,96,96,128,1,float16,fp8,0,0.014417600631713868
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,96,96,128,1,fp8,fp8,0,0.014452800154685974
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,96,1,128,1,float16,float16,0,0.012430399656295776
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,96,1,128,1,float16,fp8,0,0.012521600723266602
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,96,1,128,1,fp8,fp8,0,0.012505599856376648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,96,2,128,1,float16,float16,0,0.013016000390052795
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,96,2,128,1,float16,fp8,0,0.012415999919176102
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,96,2,128,1,fp8,fp8,0,0.01295199990272522
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,96,4,128,1,float16,float16,0,0.012479999661445617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,96,2,128,1,fp8,fp8,0,0.04294880032539368
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,96,4,128,1,float16,fp8,0,0.013457599282264709
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,96,4,128,1,fp8,fp8,0,0.012521600723266602
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,96,8,128,1,float16,float16,0,0.013441599905490875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,96,8,128,1,float16,fp8,0,0.01342719942331314
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,96,8,128,1,fp8,fp8,0,0.0125231996178627
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,96,96,128,1,float16,float16,0,0.01247360035777092
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,96,96,128,1,float16,fp8,0,0.010513599961996078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,96,96,128,1,fp8,fp8,0,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,96,1,128,1,float16,float16,0,0.010407999902963639
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,96,1,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,96,1,128,1,fp8,fp8,0,0.010424000024795533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,96,2,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,96,2,128,1,float16,fp8,0,0.010476800054311753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,96,4,128,1,float16,float16,0,0.010428799688816071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,96,4,128,1,float16,fp8,0,0.01043199971318245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,96,4,128,1,fp8,fp8,0,0.01037440001964569
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,96,8,128,1,float16,float16,0,0.010507199913263321
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,96,8,128,1,float16,fp8,0,0.010401599854230881
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,96,8,128,1,fp8,fp8,0,0.010366400331258773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,96,96,128,1,fp8,fp8,0,0.02892799973487854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,96,1,128,1,float16,float16,0,0.5041215896606446
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,96,1,128,1,float16,fp8,0,0.48811678886413573
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,96,1,128,1,fp8,fp8,0,0.48756961822509765
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,96,2,128,1,float16,float16,0,0.5049808025360107
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,96,2,128,1,float16,fp8,0,0.4880815982818604
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,96,2,128,1,fp8,fp8,0,0.4872896194458008
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,96,8,128,1,fp8,fp8,0,0.026840001344680786
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,96,4,128,1,float16,fp8,0,0.4879216194152832
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,96,4,128,1,fp8,fp8,0,0.48755202293395994
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,96,2,128,1,float16,float16,0,0.07591840028762817
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,96,8,128,1,float16,fp8,0,0.4880159854888916
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,96,96,128,1,float16,float16,0,0.01454080045223236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,96,8,128,1,float16,float16,0,0.5075215816497802
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,96,96,128,1,fp8,fp8,0,0.27243518829345703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,96,1,128,1,float16,float16,0,0.2593024015426636
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,96,1,128,1,float16,fp8,0,0.24855999946594237
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,96,96,128,1,float16,fp8,0,0.27200000286102294
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,96,1,128,1,fp8,fp8,0,0.24871840476989746
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,96,2,128,1,float16,float16,0,0.25921599864959716
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,96,2,128,1,float16,fp8,0,0.247491192817688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,96,2,128,1,fp8,fp8,0,0.24854719638824463
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,96,2,128,1,fp8,fp8,0,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,96,4,128,1,float16,fp8,0,0.24886720180511473
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,96,4,128,1,fp8,fp8,0,0.2485935926437378
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,96,8,128,1,float16,float16,0,0.2617088079452515
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,96,8,128,1,float16,fp8,0,0.24824318885803223
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,96,96,128,1,float16,float16,0,0.14929920434951782
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,96,8,128,1,fp8,fp8,0,0.2489567995071411
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,96,96,128,1,float16,fp8,0,0.14104479551315308
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,96,96,128,1,fp8,fp8,0,0.14151519536972046
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,96,1,128,1,float16,float16,0,0.13536800146102906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,96,1,128,1,float16,fp8,0,0.1286463975906372
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,96,1,128,1,fp8,fp8,0,0.128603196144104
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,96,2,128,1,float16,float16,0,0.13524800539016724
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,96,2,128,1,float16,fp8,0,0.12930560111999512
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,96,2,128,1,fp8,fp8,0,0.12850719690322876
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,96,4,128,1,float16,float16,0,0.13615200519561768
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,96,4,128,1,float16,fp8,0,0.1288175940513611
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,96,4,128,1,fp8,fp8,0,0.12876479625701903
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,96,8,128,1,float16,fp8,0,0.12884000539779664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,96,8,128,1,fp8,fp8,0,0.12829760313034058
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,96,96,128,1,float16,fp8,0,0.07417759895324708
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,96,96,128,1,float16,float16,0,0.08201119899749756
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,96,96,128,1,fp8,fp8,0,0.07444639801979065
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,96,1,128,1,float16,fp8,0,0.06861600279808044
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,96,1,128,1,fp8,fp8,0,0.06839039921760559
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,96,4,128,1,float16,float16,0,0.505785608291626
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,96,2,128,1,float16,float16,0,0.07322239875793457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,96,2,128,1,float16,fp8,0,0.06838560104370117
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,96,2,128,1,fp8,fp8,0,0.06849120259284973
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,96,4,128,1,float16,float16,0,0.07360000014305115
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,96,4,128,1,float16,fp8,0,0.06871839761734008
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,96,4,128,1,fp8,fp8,0,0.06871039867401123
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,96,8,128,1,float16,float16,0,0.07411999702453613
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,96,96,128,1,float16,float16,0,0.2875888109207153
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,96,8,128,1,float16,fp8,0,0.06869279742240905
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,96,8,128,1,fp8,fp8,0,0.48786239624023436
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,96,8,128,1,fp8,fp8,0,0.0685375988483429
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,96,96,128,1,float16,float16,0,0.044249600172042845
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,96,96,128,1,float16,fp8,0,0.042510399222373964
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,96,96,128,1,fp8,fp8,0,0.04229759871959686
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,96,1,128,1,float16,float16,0,0.04192320108413696
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,96,1,128,1,float16,fp8,0,0.03914879858493805
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,96,1,128,1,fp8,fp8,0,0.039110401272773744
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,96,2,128,1,float16,float16,0,0.042243200540542605
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,96,2,128,1,float16,fp8,0,0.03911679983139038
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,96,2,128,1,fp8,fp8,0,0.03916319906711578
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,96,4,128,1,float16,float16,0,0.041859200596809386
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,96,4,128,1,fp8,fp8,0,0.03922240138053894
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,96,4,128,1,float16,fp8,0,0.03912160098552704
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,96,8,128,1,float16,float16,0,0.0412992000579834
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,96,8,128,1,float16,fp8,0,0.03912320137023926
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,96,8,128,1,fp8,fp8,0,0.0391184002161026
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,96,96,128,1,float16,fp8,0,0.02581599950790405
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,96,96,128,1,fp8,fp8,0,0.02651839852333069
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,96,1,128,1,fp8,fp8,0,0.024780799448490144
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,96,2,128,1,float16,float16,0,0.025059199333190917
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,96,2,128,1,float16,fp8,0,0.02476799935102463
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,96,4,128,1,float16,float16,0,0.2597631931304932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,96,1,128,1,float16,float16,0,0.025808000564575197
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,96,2,128,1,fp8,fp8,0,0.024718399345874786
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,96,4,128,1,float16,float16,0,0.02629440128803253
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,96,1,128,1,float16,fp8,0,0.02481440007686615
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,96,4,128,1,fp8,fp8,0,0.02481919974088669
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,96,8,128,1,float16,float16,0,0.026372799277305604
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,96,8,128,1,fp8,fp8,0,0.024748800694942473
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,96,96,128,1,float16,float16,0,0.01867839992046356
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,96,96,128,1,float16,fp8,0,0.01653279960155487
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,96,96,128,1,fp8,fp8,0,0.016604800522327424
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,96,1,128,1,float16,float16,0,0.016564799845218657
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,96,4,128,1,float16,fp8,0,0.024799999594688416
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,96,1,128,1,fp8,fp8,0,0.01656160056591034
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,96,2,128,1,float16,float16,0,0.016577599942684172
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,96,2,128,1,fp8,fp8,0,0.016571199893951415
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,96,4,128,1,float16,float16,0,0.016596800088882445
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,96,8,128,1,float16,float16,0,0.13599200248718263
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,96,4,128,1,float16,fp8,0,0.0166143998503685
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,96,4,128,1,fp8,fp8,0,0.01647839993238449
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,96,8,128,1,float16,float16,0,0.01664479970932007
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,96,8,128,1,float16,fp8,0,0.0165120005607605
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,96,8,128,1,fp8,fp8,0,0.01656160056591034
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,96,96,128,1,float16,float16,0,0.014561599493026734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,96,96,128,1,float16,fp8,0,0.01252640038728714
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,96,96,128,1,fp8,fp8,0,0.012520000338554382
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,96,1,128,1,float16,float16,0,0.01250240057706833
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,96,1,128,1,float16,float16,0,0.07334880232810974
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,96,1,128,1,float16,fp8,0,0.012486399710178375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,96,1,128,1,fp8,fp8,0,0.012516799569129943
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,96,2,128,1,float16,fp8,0,0.01249760016798973
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,96,2,128,1,float16,float16,0,0.012532800436019897
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,96,2,128,1,fp8,fp8,0,0.012467200309038163
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,96,4,128,1,float16,float16,0,0.01249919980764389
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,96,4,128,1,fp8,fp8,0,0.012518399953842163
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,96,8,128,1,float16,float16,0,0.012508800625801087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,96,8,128,1,float16,fp8,0,0.012452799826860428
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,96,8,128,1,fp8,fp8,0,0.012484800070524216
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,96,96,128,1,float16,float16,0,0.012428800016641617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,96,96,128,1,fp8,fp8,0,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,96,1,128,1,float16,float16,0,0.010420800000429154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,96,1,128,1,float16,fp8,0,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,96,1,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,96,2,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,96,2,128,1,float16,fp8,0,0.010406400263309478
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,96,2,128,1,fp8,fp8,0,0.010372799634933472
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,96,4,128,1,float16,float16,0,0.010422399640083313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,96,4,128,1,float16,fp8,0,0.010435199737548828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,96,4,128,1,fp8,fp8,0,0.010372799634933472
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,96,8,128,1,float16,float16,0,0.010422399640083313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,96,96,128,1,float16,float16,0,0.02839680016040802
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,96,8,128,1,float16,fp8,0,0.010425599664449692
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,96,8,128,1,fp8,fp8,0,0.010500799864530563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,96,1,128,1,float16,float16,0,0.5008639812469482
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,96,8,128,1,float16,fp8,0,0.024851199984550477
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,96,1,128,1,float16,fp8,0,0.47057762145996096
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,96,1,128,1,float16,fp8,0,0.016475200653076172
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,96,1,128,1,fp8,fp8,0,0.47039361000061036
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,96,2,128,1,float16,float16,0,0.5000095844268799
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,96,2,128,1,float16,fp8,0,0.4703343868255615
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,96,2,128,1,fp8,fp8,0,0.4702432155609131
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,96,4,128,1,float16,float16,0,0.49985599517822266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,96,4,128,1,float16,fp8,0,0.4708255767822266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,96,4,128,1,float16,fp8,0,0.01252640038728714
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,96,8,128,1,float16,float16,0,0.49987521171569826
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,96,8,128,1,float16,fp8,0,0.4701888084411621
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,96,96,128,1,float16,float16,0,0.2577023983001709
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,96,96,128,1,float16,fp8,0,0.24153599739074708
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,96,96,128,1,float16,fp8,0,0.01040000021457672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,96,96,128,1,fp8,fp8,0,0.24134719371795654
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,96,1,128,1,float16,float16,0,0.25599839687347414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,96,1,128,1,float16,fp8,0,0.24012160301208496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,96,1,128,1,fp8,fp8,0,0.23975999355316163
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,96,2,128,1,float16,float16,0,0.25552959442138673
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,96,2,128,1,float16,fp8,0,0.24039199352264404
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,96,2,128,1,fp8,fp8,0,0.24039039611816407
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,96,4,128,1,float16,float16,0,0.25730719566345217
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,96,4,128,1,float16,fp8,0,0.2405776023864746
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,96,4,128,1,fp8,fp8,0,0.24002559185028077
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,96,8,128,1,float16,float16,0,0.2552175998687744
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,96,8,128,1,float16,fp8,0,0.2404848098754883
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,96,96,128,1,float16,float16,0,0.13654719591140746
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,96,8,128,1,fp8,fp8,0,0.24028160572052001
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,96,96,128,1,float16,fp8,0,0.12625279426574706
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,96,96,128,1,fp8,fp8,0,0.12637439966201783
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,96,1,128,1,float16,float16,0,0.13460320234298706
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,96,1,128,1,float16,fp8,0,0.12533440589904785
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,96,1,128,1,fp8,fp8,0,0.12516000270843505
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,96,2,128,1,float16,float16,0,0.13455359935760497
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,96,2,128,1,float16,fp8,0,0.12569119930267333
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,96,2,128,1,fp8,fp8,0,0.12565120458602905
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,96,4,128,1,float16,float16,0,0.13376480340957642
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,96,4,128,1,float16,fp8,0,0.12533440589904785
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,96,4,128,1,fp8,fp8,0,0.12586079835891723
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,96,8,128,1,float16,float16,0,0.13392159938812256
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,96,8,128,1,fp8,fp8,0,0.12545759677886964
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,96,2,128,1,float16,fp8,0,0.01656000018119812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,96,96,128,1,float16,float16,0,0.07483839988708496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,96,96,128,1,float16,fp8,0,0.0678384006023407
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,96,96,128,1,fp8,fp8,0,0.06769279837608337
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,96,1,128,1,float16,float16,0,0.07248640060424805
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,96,1,128,1,float16,fp8,0,0.06751840114593506
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,96,2,128,1,float16,float16,0,0.07270240187644958
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,96,2,128,1,float16,fp8,0,0.06718559861183167
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,96,4,128,1,float16,fp8,0,0.06759999990463257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,96,4,128,1,float16,float16,0,0.07267040014266968
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,96,4,128,1,fp8,fp8,0,0.06760799884796143
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,96,8,128,1,float16,float16,0,0.07263519763946533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,96,8,128,1,float16,fp8,0,0.06752960085868835
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,96,8,128,1,fp8,fp8,0,0.06767039895057678
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,96,96,128,1,float16,float16,0,0.04324640035629272
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,96,96,128,1,float16,fp8,0,0.03893760144710541
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,96,1,128,1,float16,float16,0,0.0411871999502182
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,96,96,128,1,fp8,fp8,0,0.038940799236297605
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,96,1,128,1,float16,fp8,0,0.03833760023117065
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,96,1,128,1,fp8,fp8,0,0.03835200071334839
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,96,2,128,1,float16,float16,0,0.04142720103263855
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,96,4,128,1,fp8,fp8,0,0.4704864025115967
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,96,2,128,1,fp8,fp8,0,0.038343998789787295
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,96,4,128,1,float16,float16,0,0.041412800550460815
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,96,4,128,1,float16,fp8,0,0.03824479877948761
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,96,8,128,1,float16,float16,0,0.04152320027351379
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,96,8,128,1,float16,fp8,0,0.0378928005695343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,96,8,128,1,fp8,fp8,0,0.039155200123786926
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,96,96,128,1,float16,float16,0,0.026855999231338502
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,96,96,128,1,float16,fp8,0,0.02433760017156601
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,96,8,128,1,fp8,fp8,0,0.4711328029632568
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,96,4,128,1,fp8,fp8,0,0.03864159882068634
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,96,1,128,1,float16,float16,0,0.02481119930744171
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,96,1,128,1,float16,fp8,0,0.024672000110149382
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,96,1,128,1,fp8,fp8,0,0.0231904000043869
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,96,2,128,1,float16,float16,0,0.024854399263858795
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,96,2,128,1,float16,fp8,0,0.024560000002384185
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,96,2,128,1,fp8,fp8,0,0.023496000468730925
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,96,4,128,1,float16,fp8,0,0.024596799910068513
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,96,4,128,1,fp8,fp8,0,0.02336000055074692
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,96,8,128,1,float16,float16,0,0.02484479993581772
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,96,8,128,1,float16,fp8,0,0.023342399299144743
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,96,8,128,1,fp8,fp8,0,0.024799999594688416
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,96,96,128,1,float16,float16,0,0.018572799861431122
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,96,96,128,1,float16,fp8,0,0.016628800332546233
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,96,96,128,1,fp8,fp8,0,0.0165120005607605
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,96,1,128,1,float16,float16,0,0.016655999422073364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,96,1,128,1,float16,fp8,0,0.01648640036582947
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,96,1,128,1,fp8,fp8,0,0.01462559998035431
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,96,2,128,1,float16,fp8,0,0.015095999836921692
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,96,2,128,1,fp8,fp8,0,0.01464959979057312
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,96,4,128,1,float16,float16,0,0.016590400040149687
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,96,4,128,1,float16,fp8,0,0.014537599682807923
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,96,4,128,1,fp8,fp8,0,0.014672000706195832
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,96,8,128,1,float16,float16,0,0.016548800468444824
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,96,8,128,1,float16,fp8,0,0.015062400698661804
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,96,8,128,1,fp8,fp8,0,0.016574400663375854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,96,96,128,1,float16,float16,0,0.014931200444698334
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,96,8,128,1,float16,fp8,0,0.12568800449371337
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,96,96,128,1,float16,fp8,0,0.012505599856376648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,96,96,128,1,fp8,fp8,0,0.012535999715328216
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,96,1,128,1,float16,float16,0,0.012894399464130402
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,96,1,128,1,fp8,fp8,0,0.06707680225372314
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,96,1,128,1,float16,fp8,0,0.012588800489902496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,96,1,128,1,fp8,fp8,0,0.012491200119256973
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,96,2,128,1,float16,fp8,0,0.012561599910259246
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,96,2,128,1,fp8,fp8,0,0.01242400035262108
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,96,2,128,1,float16,float16,0,0.012545600533485413
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,96,4,128,1,float16,fp8,0,0.01255200058221817
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,96,4,128,1,fp8,fp8,0,0.0124719999730587
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,96,8,128,1,float16,float16,0,0.012510399520397186
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,96,8,128,1,float16,fp8,0,0.012479999661445617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,96,8,128,1,fp8,fp8,0,0.012561599910259246
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,96,96,128,1,float16,float16,0,0.012532800436019897
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,96,96,128,1,fp8,fp8,0,0.024766400456428528
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,96,96,128,1,float16,fp8,0,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,96,96,128,1,fp8,fp8,0,0.010491199791431427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,96,1,128,1,float16,float16,0,0.010436800122261048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,96,1,128,1,float16,fp8,0,0.010396800190210342
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,96,1,128,1,fp8,fp8,0,0.010476800054311753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,96,2,128,1,float16,float16,0,0.010416000336408614
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,96,2,128,1,float16,fp8,0,0.010476800054311753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,96,2,128,1,fp8,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,96,4,128,1,float16,float16,0,0.024775999784469604
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,96,4,128,1,float16,float16,0,0.010438399761915207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,96,4,128,1,float16,fp8,0,0.010355199873447418
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,96,4,128,1,fp8,fp8,0,0.010416000336408614
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,96,8,128,1,float16,fp8,0,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,96,8,128,1,fp8,fp8,0,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,96,2,128,1,float16,float16,0,0.016510400176048278
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,96,2,128,1,fp8,fp8,0,0.06779839992523193
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,96,4,128,1,float16,float16,0,0.012540799379348756
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,96,2,128,1,float16,fp8,0,0.03824639916419983
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,96,8,128,1,float16,float16,0,0.01048479974269867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,1,128,1,float16,fp8,0,22.574461364746092
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,1,128,1,fp8,fp8,0,22.424984741210938
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,2,128,1,float16,fp8,0,22.418896484375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,2,128,1,fp8,fp8,0,22.463870239257812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,4,128,1,float16,fp8,0,22.50479736328125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,1,128,1,float16,float16,0,28.72230224609375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,2,128,1,float16,float16,0,28.893890380859375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,4,128,1,float16,float16,0,28.86443176269531
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,64,128,1,float16,fp8,0,11.606279754638672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,64,128,1,fp8,fp8,0,11.662493133544922
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,4,128,1,fp8,fp8,0,22.989533996582033
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,1,128,1,float16,float16,0,14.476341247558594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,8,128,1,float16,fp8,0,22.674462890625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,8,128,1,fp8,fp8,0,22.632501220703126
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,8,128,1,float16,float16,0,29.073541259765626
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,1,128,1,float16,fp8,0,11.459059143066407
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,1,128,1,fp8,fp8,0,11.500462341308594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,2,128,1,float16,fp8,0,11.314794921875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,2,128,1,fp8,fp8,0,11.323670196533204
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,2,128,1,float16,float16,0,14.819876098632813
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,4,128,1,float16,float16,0,14.649137878417969
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,4,128,1,float16,fp8,0,11.240106964111328
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,4,128,1,fp8,fp8,0,11.270848083496094
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,64,128,1,float16,fp8,0,5.927371215820313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,64,128,1,fp8,fp8,0,5.94529914855957
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,8,128,1,float16,fp8,0,11.449420928955078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,8,128,1,fp8,fp8,0,11.434860992431641
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,8,128,1,float16,float16,0,14.7436279296875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,1,128,1,float16,float16,0,7.449031829833984
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,1,128,1,float16,fp8,0,5.627763366699218
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,1,128,1,fp8,fp8,0,5.7273918151855465
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,2,128,1,float16,fp8,0,5.682905578613282
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,2,128,1,float16,float16,0,7.4008033752441404
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,2,128,1,fp8,fp8,0,5.697204971313477
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,4,128,1,float16,float16,0,7.369739532470703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,4,128,1,float16,fp8,0,5.602414321899414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,4,128,1,fp8,fp8,0,5.69629135131836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,8,128,1,float16,fp8,0,5.762971115112305
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,8,128,1,fp8,fp8,0,5.683979034423828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,8,128,1,float16,float16,0,7.306630706787109
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,64,128,1,float16,fp8,0,3.0315296173095705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,64,128,1,fp8,fp8,0,3.0096559524536133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,1,128,1,float16,fp8,0,2.8853103637695314
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,1,128,1,float16,float16,0,3.6096126556396486
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,1,128,1,fp8,fp8,0,3.0530223846435547
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,2,128,1,float16,fp8,0,2.8202816009521485
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,2,128,1,float16,float16,0,3.4627761840820312
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,2,128,1,fp8,fp8,0,2.8609439849853517
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,4,128,1,float16,fp8,0,2.8392255783081053
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,4,128,1,fp8,fp8,0,2.863545608520508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,4,128,1,float16,float16,0,3.6353328704833983
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,8,128,1,float16,float16,0,3.6845729827880858
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,8,128,1,float16,fp8,0,2.8847984313964843
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,8,128,1,fp8,fp8,0,2.8914751052856444
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,1,128,1,float16,fp8,0,12.956675720214843
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,1,128,1,fp8,fp8,0,12.921888732910157
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,2,128,1,float16,fp8,0,13.02379150390625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,1,128,1,float16,float16,0,16.7666748046875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,2,128,1,float16,float16,0,16.869827270507812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,2,128,1,fp8,fp8,0,13.15555419921875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,4,128,1,float16,fp8,0,13.075650024414063
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,4,128,1,fp8,fp8,0,12.969918823242187
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,4,128,1,float16,float16,0,16.752461242675782
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,8,128,1,float16,float16,0,16.815357971191407
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,64,128,1,float16,fp8,0,6.896110534667969
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,8,128,1,float16,fp8,0,13.234725952148438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,8,128,1,fp8,fp8,0,13.119093322753907
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,64,128,1,fp8,fp8,0,6.913887786865234
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,1,128,1,float16,float16,0,8.353910064697265
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,1,128,1,float16,fp8,0,6.453169250488282
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,1,128,1,fp8,fp8,0,6.490962982177734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,2,128,1,float16,fp8,0,6.603498840332032
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,2,128,1,float16,float16,0,8.179742431640625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,2,128,1,fp8,fp8,0,6.5014190673828125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,4,128,1,float16,float16,0,8.514119720458984
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,4,128,1,float16,fp8,0,6.455097961425781
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,4,128,1,fp8,fp8,0,6.474657440185547
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,8,128,1,float16,float16,0,8.399606323242187
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,8,128,1,float16,fp8,0,6.652947235107422
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,8,128,1,fp8,fp8,0,6.548774719238281
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,64,128,1,float16,fp8,0,3.6975345611572266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,64,128,1,fp8,fp8,0,3.378209686279297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,1,128,1,float16,float16,0,4.212515258789063
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,1,128,1,float16,fp8,0,3.2214622497558594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,1,128,1,fp8,fp8,0,3.3228992462158202
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,2,128,1,float16,fp8,0,3.2264591217041017
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,2,128,1,float16,float16,0,4.19805908203125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,2,128,1,fp8,fp8,0,3.2526737213134767
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,4,128,1,float16,float16,0,4.142627334594726
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,4,128,1,float16,fp8,0,3.4667999267578127
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,4,128,1,fp8,fp8,0,3.2987438201904298
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,8,128,1,float16,fp8,0,3.2687057495117187
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,8,128,1,fp8,fp8,0,3.3177936553955076
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,8,128,1,float16,float16,0,4.170126342773438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,64,128,1,float16,fp8,0,1.8084751129150392
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,64,128,1,fp8,fp8,0,2.1322383880615234
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,1,128,1,float16,float16,0,1.9823648452758789
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,1,128,1,float16,fp8,0,1.6624576568603515
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,1,128,1,fp8,fp8,0,1.9061935424804688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,2,128,1,float16,float16,0,2.00307674407959
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,2,128,1,float16,fp8,0,1.7145296096801759
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,2,128,1,fp8,fp8,0,1.8352991104125977
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,4,128,1,float16,float16,0,1.9079023361206056
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,4,128,1,float16,fp8,0,1.6694047927856446
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,4,128,1,fp8,fp8,0,1.897313690185547
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,8,128,1,float16,float16,0,1.9299024581909179
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,8,128,1,float16,fp8,0,1.6862272262573241
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,8,128,1,fp8,fp8,0,1.8878288269042969
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,1,128,1,float16,fp8,0,9.092533111572266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,1,128,1,float16,float16,0,11.799816131591797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,1,128,1,fp8,fp8,0,9.224807739257812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,2,128,1,float16,float16,0,11.815961456298828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,64,128,1,float16,float16,0,2.0154176712036134
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,2,128,1,float16,fp8,0,9.233465576171875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,64,128,1,float16,float16,0,4.229496002197266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,64,128,1,float16,float16,0,3.6188495635986326
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,2,128,1,fp8,fp8,0,9.207430267333985
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,64,128,1,float16,float16,0,7.497551727294922
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,64,128,1,float16,float16,0,8.633331298828125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,4,128,1,float16,fp8,0,9.270809936523438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,4,128,1,float16,float16,0,11.470178985595703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,64,128,1,float16,float16,0,14.789431762695312
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,4,128,1,fp8,fp8,0,8.965137481689453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,64,128,1,float16,float16,0,6.1627552032470705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,8,128,1,float16,fp8,0,9.294964599609376
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,64,128,1,float16,fp8,0,4.894968032836914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,8,128,1,fp8,fp8,0,9.184081268310546
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,64,128,1,fp8,fp8,0,5.065895843505859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,1,128,1,float16,fp8,0,4.552751922607422
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,1,128,1,float16,float16,0,5.791231918334961
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,1,128,1,fp8,fp8,0,4.641227340698242
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,8,128,1,float16,float16,0,11.567673492431641
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,2,128,1,float16,fp8,0,4.62573127746582
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,2,128,1,fp8,fp8,0,4.593560028076172
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,2,128,1,float16,float16,0,5.971803283691406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,4,128,1,float16,fp8,0,4.5892799377441404
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,4,128,1,float16,float16,0,5.889688110351562
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,4,128,1,fp8,fp8,0,4.7068016052246096
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,8,128,1,float16,float16,0,5.895822525024414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,64,128,1,float16,float16,0,3.085755157470703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,8,128,1,float16,fp8,0,4.451582336425782
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,64,128,1,float16,fp8,0,2.787436866760254
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,64,128,1,fp8,fp8,0,2.5085840225219727
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,1,128,1,float16,fp8,0,2.307486343383789
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,1,128,1,float16,float16,0,2.858195114135742
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,8,128,1,fp8,fp8,0,4.604403305053711
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,2,128,1,float16,fp8,0,2.2644575119018553
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,1,128,1,fp8,fp8,0,2.6447776794433593
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,2,128,1,fp8,fp8,0,2.284836769104004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,2,128,1,float16,float16,0,3.050787162780762
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,4,128,1,float16,fp8,0,2.290902328491211
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,4,128,1,float16,float16,0,2.8871952056884767
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,4,128,1,fp8,fp8,0,2.50797119140625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,8,128,1,float16,float16,0,2.7771087646484376
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,64,128,1,fp8,fp8,0,1.2655247688293456
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,8,128,1,float16,fp8,0,2.2586288452148438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,64,128,1,float16,float16,0,1.4677583694458007
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,64,128,1,float16,fp8,0,1.5190815925598145
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,1,128,1,float16,fp8,0,1.2051263809204102
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,1,128,1,float16,float16,0,1.544542407989502
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,1,128,1,fp8,fp8,0,1.4856255531311036
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,2,128,1,float16,fp8,0,1.1760208129882812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,2,128,1,float16,float16,0,1.3382368087768555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,4,128,1,float16,fp8,0,1.1812031745910645
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,2,128,1,fp8,fp8,0,1.3432111740112305
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,4,128,1,float16,float16,0,1.3683839797973634
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,4,128,1,fp8,fp8,0,1.3567503929138183
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,8,128,1,float16,float16,0,1.430408000946045
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,8,128,1,float16,fp8,0,1.1684831619262694
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,8,128,1,fp8,fp8,0,1.3189536094665528
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,8,128,1,fp8,fp8,0,2.295086479187012
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,1,128,1,float16,fp8,0,12.063243103027343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,1,128,1,fp8,fp8,0,12.096654510498047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,2,128,1,float16,fp8,0,12.266059112548827
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,2,128,1,fp8,fp8,0,12.17559814453125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,1,128,1,float16,float16,0,15.351596069335937
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,2,128,1,float16,float16,0,15.297042846679688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,4,128,1,float16,fp8,0,11.945665740966797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,4,128,1,float16,float16,0,15.0481201171875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,64,128,1,float16,fp8,0,6.513211059570312
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,64,128,1,float16,float16,0,8.375833892822266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,64,128,1,fp8,fp8,0,6.680193328857422
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,4,128,1,fp8,fp8,0,12.218367767333984
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,8,128,1,float16,fp8,0,12.08277587890625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,8,128,1,fp8,fp8,0,12.170563507080079
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,8,128,1,float16,float16,0,15.592231750488281
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,1,128,1,float16,float16,0,7.458710479736328
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,1,128,1,float16,fp8,0,5.973361587524414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,1,128,1,fp8,fp8,0,5.977953720092773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,2,128,1,float16,fp8,0,6.03535041809082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,2,128,1,fp8,fp8,0,6.043328094482422
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,2,128,1,float16,float16,0,7.790643310546875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,4,128,1,float16,float16,0,7.706364440917969
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,4,128,1,float16,fp8,0,6.005136108398437
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,4,128,1,fp8,fp8,0,5.975787353515625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,64,128,1,float16,fp8,0,3.4863441467285154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,64,128,1,float16,float16,0,4.0565185546875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,8,128,1,float16,fp8,0,5.96862564086914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,8,128,1,fp8,fp8,0,6.090793609619141
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,8,128,1,float16,float16,0,7.8134910583496096
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,64,128,1,fp8,fp8,0,3.431787109375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,1,128,1,float16,fp8,0,3.0330432891845702
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,1,128,1,float16,float16,0,3.720502471923828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,1,128,1,fp8,fp8,0,3.007382392883301
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,2,128,1,float16,fp8,0,2.992731285095215
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,2,128,1,float16,float16,0,3.811511993408203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,2,128,1,fp8,fp8,0,3.0262592315673826
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,4,128,1,float16,fp8,0,3.0061216354370117
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,4,128,1,float16,float16,0,3.771169662475586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,4,128,1,fp8,fp8,0,2.991049575805664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,64,128,1,float16,fp8,0,1.6898223876953125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,64,128,1,fp8,fp8,0,1.6492624282836914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,8,128,1,float16,fp8,0,2.989491271972656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,64,128,1,float16,float16,0,2.1018272399902345
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,8,128,1,float16,float16,0,3.75948486328125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,8,128,1,fp8,fp8,0,3.0212528228759767
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,1,128,1,float16,float16,0,1.9400272369384766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,1,128,1,float16,fp8,0,1.522548770904541
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,1,128,1,fp8,fp8,0,1.5118368148803711
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,2,128,1,float16,fp8,0,1.5227248191833496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,2,128,1,float16,float16,0,1.7265535354614259
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,2,128,1,fp8,fp8,0,1.6771200180053711
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,4,128,1,float16,fp8,0,1.5698736190795899
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,4,128,1,float16,float16,0,1.9146703720092773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,4,128,1,fp8,fp8,0,1.8020063400268556
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,64,128,1,float16,fp8,0,0.8683679580688477
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,64,128,1,float16,float16,0,1.0833776473999024
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,8,128,1,float16,fp8,0,1.5018783569335938
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,64,128,1,fp8,fp8,0,0.8774496078491211
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,8,128,1,fp8,fp8,0,1.5209360122680664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,1,128,1,float16,float16,0,0.9875295639038086
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,1,128,1,float16,fp8,0,0.8419008255004883
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,1,128,1,fp8,fp8,0,0.7880191802978516
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,2,128,1,float16,fp8,0,0.791212797164917
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,2,128,1,float16,float16,0,0.9653840065002441
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,2,128,1,fp8,fp8,0,0.8082400321960449
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,4,128,1,float16,float16,0,0.9107855796813965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,4,128,1,float16,fp8,0,0.919809627532959
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,4,128,1,fp8,fp8,0,0.7929599761962891
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,8,128,1,float16,float16,0,0.8998031616210938
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,8,128,1,float16,fp8,0,0.8052384376525878
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,8,128,1,fp8,fp8,0,0.9296527862548828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,8,128,1,float16,float16,0,1.8069328308105468
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,1,128,1,fp8,fp8,0,6.926487731933594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,1,128,1,float16,fp8,0,7.105899047851563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,2,128,1,float16,fp8,0,6.912207794189453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,2,128,1,fp8,fp8,0,7.030429077148438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,1,128,1,float16,float16,0,8.99678726196289
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,2,128,1,float16,float16,0,8.787334442138672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,4,128,1,float16,fp8,0,6.8472129821777346
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,4,128,1,float16,float16,0,8.932857513427734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,64,128,1,float16,fp8,0,3.936654281616211
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,64,128,1,fp8,fp8,0,3.817108917236328
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,64,128,1,float16,float16,0,4.9595489501953125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,1,128,1,float16,float16,0,4.438358306884766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,4,128,1,fp8,fp8,0,6.992810821533203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,8,128,1,float16,fp8,0,7.042610931396484
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,8,128,1,fp8,fp8,0,7.106657409667969
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,8,128,1,float16,float16,0,9.18919677734375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,1,128,1,float16,fp8,0,3.444083023071289
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,1,128,1,fp8,fp8,0,3.4260177612304688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,2,128,1,float16,fp8,0,3.619801712036133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,2,128,1,fp8,fp8,0,3.50488166809082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,2,128,1,float16,float16,0,4.395547103881836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,4,128,1,float16,fp8,0,3.4895233154296874
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,4,128,1,float16,float16,0,4.438857650756836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,4,128,1,fp8,fp8,0,3.6739360809326174
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,64,128,1,float16,float16,0,2.3946815490722657
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,64,128,1,fp8,fp8,0,1.9824928283691405
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,8,128,1,float16,fp8,0,3.52269287109375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,64,128,1,float16,fp8,0,2.3832143783569335
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,8,128,1,float16,float16,0,4.420016098022461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,8,128,1,fp8,fp8,0,3.4615726470947266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,1,128,1,fp8,fp8,0,1.770631980895996
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,1,128,1,float16,float16,0,2.1428607940673827
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,1,128,1,float16,fp8,0,1.9884464263916015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,2,128,1,float16,float16,0,1.9881631851196289
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,2,128,1,fp8,fp8,0,2.003212738037109
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,4,128,1,float16,float16,0,2.1129344940185546
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,4,128,1,float16,fp8,0,1.7707168579101562
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,8,128,1,float16,float16,0,2.123865509033203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,8,128,1,float16,fp8,0,1.9879440307617187
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,8,128,1,fp8,fp8,0,1.7653295516967773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,64,128,1,float16,float16,0,1.267251205444336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,64,128,1,float16,fp8,0,1.0240544319152831
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,1,128,1,float16,fp8,0,0.9079216003417969
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,1,128,1,float16,float16,0,1.0142704010009767
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,64,128,1,fp8,fp8,0,1.1583135604858399
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,1,128,1,fp8,fp8,0,1.0640735626220703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,2,128,1,float16,fp8,0,1.7442592620849608
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,2,128,1,float16,float16,0,1.0147711753845214
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,2,128,1,float16,fp8,0,0.8917247772216796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,2,128,1,fp8,fp8,0,1.004361629486084
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,4,128,1,float16,float16,0,1.0247936248779297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,4,128,1,float16,fp8,0,0.9598752021789551
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,4,128,1,fp8,fp8,0,0.8878720283508301
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,8,128,1,float16,float16,0,1.0244000434875489
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,8,128,1,float16,fp8,0,1.001155185699463
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,64,128,1,float16,float16,0,0.6040239810943604
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,64,128,1,float16,fp8,0,0.5693200111389161
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,64,128,1,fp8,fp8,0,0.5284815788269043
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,1,128,1,float16,float16,0,0.518393611907959
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,1,128,1,float16,fp8,0,0.47099838256835935
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,1,128,1,fp8,fp8,0,0.5173056125640869
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,4,128,1,fp8,fp8,0,2.1648544311523437
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,2,128,1,float16,fp8,0,0.5112207889556885
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,4,128,1,float16,float16,0,0.5211056232452392
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,4,128,1,float16,fp8,0,0.47057437896728516
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,2,128,1,float16,float16,0,0.5309455871582032
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,4,128,1,fp8,fp8,0,0.4924335956573486
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,8,128,1,float16,fp8,0,0.4885727882385254
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,8,128,1,float16,float16,0,0.53471999168396
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,8,128,1,fp8,fp8,0,0.8977968215942382
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,2,128,1,fp8,fp8,0,0.47845921516418455
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,8,128,1,fp8,fp8,0,0.47216000556945803
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,1,128,1,fp8,fp8,0,6.663764953613281
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,2,128,1,float16,fp8,0,6.5581199645996096
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,1,128,1,float16,fp8,0,6.46165771484375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,1,128,1,float16,float16,0,8.176624298095703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,2,128,1,float16,float16,0,8.265872192382812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,2,128,1,fp8,fp8,0,6.485472106933594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,4,128,1,float16,fp8,0,6.506161499023437
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,4,128,1,float16,float16,0,8.098040008544922
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,64,128,1,float16,fp8,0,3.7560558319091797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,64,128,1,float16,float16,0,4.830625534057617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,4,128,1,fp8,fp8,0,6.638288116455078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,8,128,1,float16,fp8,0,6.6031135559082035
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,64,128,1,fp8,fp8,0,3.7533920288085936
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,8,128,1,fp8,fp8,0,6.6328895568847654
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,8,128,1,float16,float16,0,8.471002960205078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,1,128,1,float16,float16,0,3.9401935577392577
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,1,128,1,float16,fp8,0,3.2526641845703126
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,2,128,1,float16,fp8,0,3.344281768798828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,1,128,1,fp8,fp8,0,3.4830272674560545
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,2,128,1,float16,float16,0,4.159040069580078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,2,128,1,fp8,fp8,0,3.2295391082763674
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,4,128,1,float16,float16,0,4.071155166625976
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,4,128,1,float16,fp8,0,3.7387569427490233
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,4,128,1,fp8,fp8,0,3.4220447540283203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,64,128,1,float16,float16,0,2.4926479339599608
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,8,128,1,fp8,fp8,0,3.268920135498047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,8,128,1,float16,fp8,0,3.3450271606445314
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,8,128,1,float16,float16,0,4.198868942260742
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,1,128,1,float16,float16,0,1.9661231994628907
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,64,128,1,fp8,fp8,0,2.141891288757324
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,1,128,1,float16,fp8,0,1.6672800064086915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,1,128,1,fp8,fp8,0,2.0659807205200194
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,2,128,1,float16,float16,0,1.8511007308959961
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,2,128,1,fp8,fp8,0,2.02138729095459
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,4,128,1,float16,fp8,0,1.6762575149536132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,4,128,1,float16,float16,0,2.067500877380371
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,4,128,1,fp8,fp8,0,1.6621728897094727
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,64,128,1,float16,fp8,0,1.9031295776367188
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,64,128,1,float16,float16,0,1.12805118560791
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,8,128,1,float16,float16,0,1.9787551879882812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,64,128,1,fp8,fp8,0,0.9742544174194336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,8,128,1,float16,fp8,0,1.886195182800293
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,1,128,1,float16,float16,0,0.9272128105163574
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,8,128,1,fp8,fp8,0,1.6652528762817382
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,1,128,1,float16,fp8,0,0.9710991859436036
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,2,128,1,float16,float16,0,0.9397983551025391
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,1,128,1,fp8,fp8,0,1.0030672073364257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,2,128,1,fp8,fp8,0,0.9178704261779785
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,4,128,1,float16,float16,0,0.9472944259643554
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,2,128,1,float16,fp8,0,1.6586799621582031
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,4,128,1,float16,fp8,0,0.8727503776550293
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,4,128,1,fp8,fp8,0,0.8602047920227051
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,8,128,1,float16,fp8,0,0.8774191856384277
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,8,128,1,fp8,fp8,0,0.8607295989990235
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,8,128,1,float16,float16,0,0.980735969543457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,64,128,1,float16,float16,0,0.5706543922424316
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,64,128,1,float16,fp8,0,0.5319471836090088
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,1,128,1,float16,float16,0,0.492907190322876
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,64,128,1,fp8,fp8,0,0.5120560169219971
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,1,128,1,float16,fp8,0,0.4996352195739746
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,2,128,1,float16,float16,0,0.48996639251708984
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,2,128,1,float16,fp8,0,0.4574687957763672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,4,128,1,float16,float16,0,0.5242239952087402
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,64,128,1,float16,fp8,0,1.162057590484619
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,4,128,1,float16,fp8,0,0.44838881492614746
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,4,128,1,fp8,fp8,0,0.45619840621948243
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,8,128,1,float16,float16,0,0.5071231842041015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,8,128,1,float16,fp8,0,0.43698720932006835
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,64,128,1,float16,float16,0,0.3027440071105957
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,64,128,1,fp8,fp8,0,0.2717983961105347
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,2,128,1,float16,fp8,0,0.9014656066894531
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,1,128,1,float16,fp8,0,0.2383903980255127
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,1,128,1,fp8,fp8,0,0.2369823932647705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,2,128,1,float16,float16,0,0.25810880661010743
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,2,128,1,float16,fp8,0,0.239136004447937
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,2,128,1,fp8,fp8,0,0.23656959533691407
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,4,128,1,float16,float16,0,0.25996639728546145
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,4,128,1,float16,fp8,0,0.2390575885772705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,4,128,1,fp8,fp8,0,0.23879520893096923
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,8,128,1,float16,float16,0,0.2625135898590088
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,8,128,1,fp8,fp8,0,0.23900160789489747
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,1,128,1,fp8,fp8,0,0.4379615783691406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,2,128,1,fp8,fp8,0,0.44200959205627444
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,8,128,1,fp8,fp8,0,0.44686079025268555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,64,128,1,float16,fp8,0,0.2941103935241699
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,1,128,1,float16,float16,0,0.2595184087753296
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,8,128,1,float16,fp8,0,0.2395551919937134
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,1,128,1,float16,fp8,0,3.846457672119141
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,1,128,1,fp8,fp8,0,3.834550476074219
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,1,128,1,float16,float16,0,4.84410400390625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,2,128,1,float16,float16,0,4.776481628417969
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,2,128,1,float16,fp8,0,3.8501022338867186
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,2,128,1,fp8,fp8,0,3.8919967651367187
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,4,128,1,float16,float16,0,4.699609756469727
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,4,128,1,float16,fp8,0,3.8613025665283205
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,4,128,1,fp8,fp8,0,3.8638992309570312
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,64,128,1,float16,fp8,0,2.3159343719482424
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,64,128,1,float16,float16,0,2.786092758178711
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,8,128,1,float16,float16,0,4.891025543212891
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,8,128,1,float16,fp8,0,4.042516708374023
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,64,128,1,fp8,fp8,0,2.474190330505371
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,8,128,1,fp8,fp8,0,3.9958751678466795
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,1,128,1,float16,float16,0,2.277974319458008
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,1,128,1,float16,fp8,0,1.9580207824707032
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,1,128,1,fp8,fp8,0,1.948017692565918
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,2,128,1,fp8,fp8,0,1.9856687545776368
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,2,128,1,float16,fp8,0,2.1136159896850586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,4,128,1,float16,fp8,0,1.9503664016723632
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,4,128,1,float16,float16,0,2.2070240020751952
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,4,128,1,fp8,fp8,0,2.20032958984375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,8,128,1,float16,float16,0,2.3194559097290037
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,8,128,1,float16,fp8,0,1.9667999267578125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,64,128,1,float16,float16,0,1.3841391563415528
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,64,128,1,float16,fp8,0,1.1868271827697754
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,8,128,1,fp8,fp8,0,1.9601648330688477
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,64,128,1,fp8,fp8,0,1.3275407791137694
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,1,128,1,float16,float16,0,1.1016832351684571
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,1,128,1,fp8,fp8,0,1.0017919540405273
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,1,128,1,float16,fp8,0,1.2204912185668946
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,2,128,1,float16,float16,0,1.1458751678466796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,2,128,1,float16,fp8,0,0.9841823577880859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,2,128,1,fp8,fp8,0,1.0243696212768554
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,4,128,1,float16,float16,0,1.115772819519043
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,4,128,1,float16,fp8,0,1.1001551628112793
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,4,128,1,fp8,fp8,0,0.9838239669799804
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,2,128,1,float16,float16,0,2.3523183822631837
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,8,128,1,float16,float16,0,1.1534943580627441
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,64,128,1,float16,float16,0,0.7025824069976807
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,64,128,1,float16,fp8,0,0.6293248176574707
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,64,128,1,fp8,fp8,0,0.6473120212554931
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,8,128,1,fp8,fp8,0,0.9847935676574707
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,1,128,1,float16,float16,0,0.5621263980865479
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,8,128,1,float16,fp8,0,1.145580768585205
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,1,128,1,float16,fp8,0,0.5112688064575195
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,1,128,1,fp8,fp8,0,0.5828112125396728
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,2,128,1,float16,float16,0,0.5712687969207764
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,2,128,1,float16,fp8,0,0.5397264003753662
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,2,128,1,fp8,fp8,0,0.5254208087921143
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,4,128,1,float16,fp8,0,0.5104527950286866
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,4,128,1,float16,float16,0,0.5602543830871582
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,4,128,1,fp8,fp8,0,0.5490303993225097
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,8,128,1,float16,float16,0,0.5738383769989014
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,8,128,1,float16,fp8,0,0.5535920143127442
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,64,128,1,float16,fp8,0,0.3222431898117065
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,1,128,1,float16,float16,0,0.2908639907836914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,8,128,1,fp8,fp8,0,0.5780399799346924
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,2,128,1,float16,fp8,0,0.27403199672698975
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,2,128,1,fp8,fp8,0,0.27490720748901365
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,4,128,1,float16,float16,0,0.3152127981185913
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,1,128,1,fp8,fp8,0,0.2699872016906738
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,4,128,1,float16,fp8,0,0.2719167947769165
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,4,128,1,fp8,fp8,0,0.27458078861236573
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,8,128,1,float16,fp8,0,0.27178881168365476
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,8,128,1,fp8,fp8,0,0.2870975971221924
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,64,128,1,float16,float16,0,0.19228960275650026
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,64,128,1,float16,fp8,0,0.18125280141830444
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,1,128,1,float16,float16,0,0.16431679725646972
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,1,128,1,float16,fp8,0,0.15544320344924928
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,1,128,1,fp8,fp8,0,0.1553455948829651
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,2,128,1,float16,float16,0,0.16414560079574586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,2,128,1,float16,fp8,0,0.15313279628753662
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,2,128,1,fp8,fp8,0,0.15227999687194824
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,4,128,1,float16,float16,0,0.162391996383667
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,4,128,1,float16,fp8,0,0.1522320032119751
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,64,128,1,float16,float16,0,0.36593921184539796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,64,128,1,fp8,fp8,0,0.3181312084197998
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,4,128,1,fp8,fp8,0,0.15251359939575196
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,8,128,1,float16,float16,0,0.1661520004272461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,8,128,1,float16,fp8,0,0.15018399953842163
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,8,128,1,fp8,fp8,0,0.151638400554657
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,2,128,1,float16,float16,0,0.29943199157714845
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,8,128,1,float16,float16,0,0.3155087947845459
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,64,128,1,fp8,fp8,0,0.17661279439926147
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,1,128,1,float16,fp8,0,0.2912688016891479
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,1,128,1,float16,fp8,0,3.8383007049560547
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,1,128,1,float16,float16,0,4.602313613891601
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,2,128,1,float16,fp8,0,3.856972885131836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,2,128,1,float16,float16,0,4.642291259765625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,2,128,1,fp8,fp8,0,3.8181568145751954
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,1,128,1,fp8,fp8,0,3.8172286987304687
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,4,128,1,float16,fp8,0,3.8357215881347657
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,4,128,1,fp8,fp8,0,3.869121551513672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,4,128,1,float16,float16,0,4.66060791015625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,64,128,1,float16,float16,0,2.919887924194336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,8,128,1,float16,fp8,0,3.862548828125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,8,128,1,fp8,fp8,0,3.9341758728027343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,8,128,1,float16,float16,0,4.80040168762207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,64,128,1,float16,fp8,0,2.418524742126465
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,1,128,1,float16,fp8,0,2.1026031494140627
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,1,128,1,fp8,fp8,0,2.071774482727051
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,64,128,1,fp8,fp8,0,2.4306608200073243
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,2,128,1,float16,float16,0,2.2090959548950195
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,2,128,1,float16,fp8,0,1.9327423095703125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,2,128,1,fp8,fp8,0,2.2075103759765624
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,4,128,1,float16,fp8,0,1.954457664489746
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,4,128,1,float16,float16,0,2.199007987976074
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,4,128,1,fp8,fp8,0,1.9447423934936523
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,8,128,1,float16,float16,0,2.221780776977539
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,8,128,1,fp8,fp8,0,1.9168687820434571
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,8,128,1,float16,fp8,0,2.147238349914551
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,64,128,1,float16,float16,0,1.4256303787231446
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,1,128,1,float16,float16,0,1.0739359855651855
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,1,128,1,float16,float16,0,2.2172927856445312
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,1,128,1,fp8,fp8,0,0.9711456298828125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,1,128,1,float16,fp8,0,1.222696018218994
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,2,128,1,float16,float16,0,1.0905376434326173
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,2,128,1,float16,fp8,0,1.065231990814209
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,2,128,1,fp8,fp8,0,1.0801712036132813
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,4,128,1,float16,float16,0,1.0863903999328612
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,4,128,1,float16,fp8,0,0.9700240135192871
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,4,128,1,fp8,fp8,0,1.2165184020996094
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,8,128,1,float16,fp8,0,1.0082511901855469
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,8,128,1,float16,float16,0,1.2519295692443848
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,64,128,1,float16,float16,0,0.7417679786682129
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,64,128,1,float16,fp8,0,0.6218416213989257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,8,128,1,fp8,fp8,0,1.0236047744750976
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,1,128,1,float16,float16,0,0.5445407867431641
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,64,128,1,fp8,fp8,0,0.6297391891479492
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,1,128,1,float16,fp8,0,0.5961679935455322
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,2,128,1,float16,float16,0,0.5481872081756591
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,1,128,1,fp8,fp8,0,0.5493279933929444
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,2,128,1,float16,fp8,0,0.5085840225219727
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,2,128,1,fp8,fp8,0,0.5218992233276367
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,4,128,1,float16,float16,0,0.5471248149871826
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,4,128,1,float16,fp8,0,0.597273588180542
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,4,128,1,fp8,fp8,0,0.49800801277160645
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,8,128,1,float16,float16,0,0.5627888202667236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,64,128,1,float16,fp8,0,1.2350607872009278
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,8,128,1,fp8,fp8,0,0.511737585067749
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,64,128,1,float16,float16,0,0.3656896114349365
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,64,128,1,float16,fp8,0,0.3425823926925659
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,64,128,1,fp8,fp8,0,0.32767519950866697
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,1,128,1,float16,float16,0,0.2856175899505615
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,64,128,1,fp8,fp8,0,1.4346608161926269
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,1,128,1,fp8,fp8,0,0.26895999908447266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,1,128,1,float16,fp8,0,0.2614799976348877
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,2,128,1,float16,float16,0,0.2780623912811279
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,2,128,1,float16,fp8,0,0.27224481105804443
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,2,128,1,fp8,fp8,0,0.2627871990203857
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,4,128,1,float16,float16,0,0.2884752035140991
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,4,128,1,float16,fp8,0,0.2689728021621704
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,4,128,1,fp8,fp8,0,0.2622895956039429
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,64,128,1,float16,float16,0,0.19434080123901368
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,64,128,1,fp8,fp8,0,0.18017760515213013
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,1,128,1,float16,float16,0,0.15235840082168578
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,1,128,1,float16,fp8,0,0.14400479793548585
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,1,128,1,fp8,fp8,0,0.14466880559921264
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,2,128,1,float16,fp8,0,0.1443120002746582
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,2,128,1,fp8,fp8,0,0.14377440214157106
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,4,128,1,float16,float16,0,0.15383199453353882
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,4,128,1,float16,fp8,0,0.1431040048599243
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,4,128,1,fp8,fp8,0,0.14436320066452027
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,8,128,1,float16,float16,0,0.15581120252609254
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,8,128,1,float16,fp8,0,0.14444320201873778
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,8,128,1,fp8,fp8,0,0.14426079988479615
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,64,128,1,float16,float16,0,0.11139199733734131
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,64,128,1,float16,fp8,0,0.10099040269851685
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,64,128,1,fp8,fp8,0,0.10120160579681396
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,1,128,1,float16,float16,0,0.08936160206794738
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,1,128,1,float16,fp8,0,0.08420640230178833
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,8,128,1,float16,fp8,0,0.5036352157592774
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,1,128,1,fp8,fp8,0,0.08408160209655761
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,2,128,1,float16,float16,0,0.08946560025215149
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,2,128,1,float16,fp8,0,0.08399199843406677
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,2,128,1,fp8,fp8,0,0.0838927984237671
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,4,128,1,float16,float16,0,0.08960639834403991
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,4,128,1,float16,fp8,0,0.08419679999351501
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,4,128,1,fp8,fp8,0,0.08428639769554139
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,8,128,1,float16,fp8,0,0.08409600257873535
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,8,128,1,fp8,fp8,0,0.08410239815711976
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,8,128,1,float16,float16,0,0.2873087882995605
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,64,128,1,float16,fp8,0,0.18040319681167602
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,8,128,1,float16,float16,0,0.09137759804725647
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,2,128,1,float16,float16,0,0.15321279764175416
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,1,128,1,float16,fp8,0,2.3755712509155273
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,1,128,1,fp8,fp8,0,2.376688003540039
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,2,128,1,float16,float16,0,2.610103988647461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,2,128,1,float16,fp8,0,2.380031967163086
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,8,128,1,float16,fp8,0,0.26547040939331057
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,8,128,1,fp8,fp8,0,0.2734015941619873
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,2,128,1,fp8,fp8,0,2.375017547607422
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,4,128,1,float16,float16,0,2.802225685119629
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,4,128,1,fp8,fp8,0,2.379974365234375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,8,128,1,float16,fp8,0,2.3890304565429688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,1,128,1,float16,float16,0,2.742220878601074
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,8,128,1,float16,float16,0,2.8407936096191406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,8,128,1,fp8,fp8,0,2.3741695404052736
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,64,128,1,float16,float16,0,1.8659536361694335
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,64,128,1,fp8,fp8,0,1.5794575691223145
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,64,128,1,float16,fp8,0,1.8886528015136719
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,1,128,1,float16,float16,0,1.3330991744995118
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,1,128,1,float16,fp8,0,1.2042304039001466
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,1,128,1,fp8,fp8,0,1.3729920387268066
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,2,128,1,float16,float16,0,1.3152799606323242
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,2,128,1,float16,fp8,0,1.2974512100219726
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,2,128,1,fp8,fp8,0,1.1995152473449706
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,4,128,1,float16,float16,0,1.3257951736450195
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,4,128,1,fp8,fp8,0,1.197766399383545
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,4,128,1,float16,fp8,0,2.5585311889648437
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,8,128,1,float16,float16,0,1.3561984062194825
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,8,128,1,float16,fp8,0,1.3115551948547364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,64,128,1,float16,float16,0,0.918886375427246
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,8,128,1,fp8,fp8,0,1.2226271629333496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,64,128,1,float16,fp8,0,0.8839232444763183
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,64,128,1,fp8,fp8,0,0.795142412185669
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,1,128,1,float16,fp8,0,0.6900271892547607
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,1,128,1,fp8,fp8,0,0.6365583896636963
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,2,128,1,float16,float16,0,0.6765056133270264
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,2,128,1,float16,fp8,0,0.6127535820007324
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,2,128,1,fp8,fp8,0,0.6612224102020263
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,4,128,1,float16,float16,0,0.6730224132537842
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,4,128,1,fp8,fp8,0,0.6220431804656983
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,4,128,1,float16,fp8,0,0.6649424076080322
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,8,128,1,float16,float16,0,0.6837808132171631
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,8,128,1,float16,fp8,0,0.6123392105102539
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,64,128,1,float16,float16,0,0.4714303970336914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,8,128,1,fp8,fp8,0,0.6288191795349121
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,64,128,1,float16,fp8,0,0.4140160083770752
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,1,128,1,float16,float16,0,0.34357120990753176
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,1,128,1,float16,fp8,0,0.3156192064285278
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,1,128,1,fp8,fp8,0,0.3402256011962891
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,2,128,1,float16,float16,0,0.33667840957641604
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,2,128,1,float16,fp8,0,0.31885600090026855
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,2,128,1,fp8,fp8,0,0.33339359760284426
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,4,128,1,float16,float16,0,0.34180479049682616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,4,128,1,float16,fp8,0,0.32940480709075926
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,4,128,1,fp8,fp8,0,0.31999199390411376
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,8,128,1,float16,float16,0,0.3490560054779053
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,8,128,1,float16,fp8,0,0.33072800636291505
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,64,128,1,float16,float16,0,0.24558720588684083
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,8,128,1,fp8,fp8,0,0.31659998893737795
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,64,128,1,float16,fp8,0,0.21740961074829102
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,4,128,1,float16,fp8,0,1.4433296203613282
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,1,128,1,float16,float16,0,0.18153280019760132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,1,128,1,float16,fp8,0,0.17607840299606323
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,2,128,1,float16,float16,0,0.1784623980522156
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,2,128,1,float16,fp8,0,0.16904640197753906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,2,128,1,fp8,fp8,0,0.17293119430541992
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,4,128,1,float16,fp8,0,0.16968159675598143
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,1,128,1,float16,float16,0,0.6599567890167236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,4,128,1,fp8,fp8,0,0.16963839530944824
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,8,128,1,float16,float16,0,0.18196480274200438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,8,128,1,fp8,fp8,0,0.16896480321884155
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,64,128,1,float16,fp8,0,0.11979360580444336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,64,128,1,fp8,fp8,0,0.1194640040397644
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,1,128,1,float16,float16,0,0.09973440170288086
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,1,128,1,float16,fp8,0,0.09422240257263184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,1,128,1,fp8,fp8,0,0.09444800019264221
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,2,128,1,float16,float16,0,0.09994879961013795
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,2,128,1,float16,fp8,0,0.09452959895133972
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,2,128,1,fp8,fp8,0,0.09474719762802124
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,4,128,1,float16,float16,0,0.10164799690246581
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,4,128,1,float16,fp8,0,0.09494720101356506
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,4,128,1,fp8,fp8,0,0.0953167974948883
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,8,128,1,float16,float16,0,0.10408960580825806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,64,128,1,fp8,fp8,0,0.4244175910949707
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,8,128,1,float16,fp8,0,0.0953104019165039
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,8,128,1,fp8,fp8,0,0.09556000232696533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,64,128,1,float16,float16,0,0.07630879878997802
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,64,128,1,float16,fp8,0,0.06890079975128174
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,64,128,1,fp8,fp8,0,0.06901280283927917
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,1,128,1,float16,float16,0,0.0607151985168457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,1,128,1,float16,fp8,0,0.05755360126495361
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,2,128,1,float16,float16,0,0.060782402753829956
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,2,128,1,float16,fp8,0,0.057447999715805054
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,2,128,1,fp8,fp8,0,0.05715519785881042
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,4,128,1,float16,float16,0,0.061027199029922485
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,4,128,1,float16,fp8,0,0.05740640163421631
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,4,128,1,fp8,fp8,0,0.057257598638534545
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,1,128,1,fp8,fp8,0,0.16912959814071654
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,8,128,1,float16,float16,0,0.061964797973632815
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,4,128,1,float16,float16,0,0.18568960428237916
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,8,128,1,float16,fp8,0,0.057601600885391235
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,8,128,1,fp8,fp8,0,0.05745120048522949
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,8,128,1,float16,fp8,0,0.17301440238952637
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,64,128,1,float16,float16,0,0.13079359531402587
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,1,128,1,fp8,fp8,0,0.05765600204467773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,1,128,1,float16,fp8,0,2.512068748474121
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,1,128,1,fp8,fp8,0,2.513545608520508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,1,128,1,float16,float16,0,2.76629753112793
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,2,128,1,float16,float16,0,2.664246368408203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,64,128,1,fp8,fp8,0,0.21561920642852783
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,2,128,1,fp8,fp8,0,2.5133520126342774
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,4,128,1,float16,fp8,0,2.5153520584106444
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,4,128,1,float16,float16,0,2.842889595031738
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,4,128,1,fp8,fp8,0,2.608878326416016
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,8,128,1,float16,fp8,0,2.507771110534668
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,8,128,1,float16,float16,0,3.0621807098388674
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,2,128,1,float16,fp8,0,2.511180877685547
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,8,128,1,fp8,fp8,0,2.50972957611084
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,1,128,1,float16,float16,0,1.3523103713989257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,1,128,1,float16,fp8,0,1.2629568099975585
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,64,128,1,float16,float16,0,2.084199905395508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,64,128,1,float16,fp8,0,1.7606847763061524
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,1,128,1,fp8,fp8,0,1.3862015724182128
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,64,128,1,fp8,fp8,0,1.9329776763916016
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,2,128,1,float16,float16,0,1.3717311859130858
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,2,128,1,float16,fp8,0,1.2627488136291505
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,2,128,1,fp8,fp8,0,1.3591856002807616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,4,128,1,float16,float16,0,1.375584030151367
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,4,128,1,fp8,fp8,0,1.2645584106445313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,4,128,1,float16,fp8,0,1.3714655876159667
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,64,128,1,float16,float16,0,1.025590419769287
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,8,128,1,float16,float16,0,1.400931167602539
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,8,128,1,fp8,fp8,0,1.3506447792053222
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,1,128,1,float16,float16,0,0.6753664016723633
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,1,128,1,float16,fp8,0,0.6527760028839111
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,64,128,1,float16,fp8,0,0.8991184234619141
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,64,128,1,fp8,fp8,0,0.9995087623596192
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,1,128,1,fp8,fp8,0,0.6757232189178467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,2,128,1,float16,float16,0,0.6762447834014893
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,2,128,1,float16,fp8,0,0.6587423801422119
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,2,128,1,fp8,fp8,0,0.6501791954040528
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,4,128,1,float16,float16,0,0.683238410949707
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,4,128,1,float16,fp8,0,0.649454402923584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,4,128,1,fp8,fp8,0,0.685811185836792
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,8,128,1,float16,float16,0,0.7091631889343262
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,8,128,1,float16,fp8,0,0.7350351810455322
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,8,128,1,fp8,fp8,0,0.6522223949432373
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,64,128,1,float16,float16,0,0.5192704200744629
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,64,128,1,float16,fp8,0,0.45923361778259275
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,1,128,1,float16,fp8,0,0.33291680812835694
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,1,128,1,fp8,fp8,0,0.3730207920074463
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,2,128,1,float16,float16,0,0.3484496116638184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,2,128,1,float16,fp8,0,0.3375103950500488
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,2,128,1,fp8,fp8,0,0.33046081066131594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,4,128,1,float16,float16,0,0.34868319034576417
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,4,128,1,float16,fp8,0,0.3445728063583374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,4,128,1,fp8,fp8,0,0.33018879890441893
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,8,128,1,float16,float16,0,0.35867359638214114
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,8,128,1,float16,fp8,0,0.3300447940826416
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,64,128,1,float16,float16,0,0.26910560131072997
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,1,128,1,float16,float16,0,0.17975679636001587
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,64,128,1,fp8,fp8,0,0.2365936040878296
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,1,128,1,float16,fp8,0,0.1786944031715393
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,1,128,1,fp8,fp8,0,0.1741984009742737
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,2,128,1,float16,float16,0,0.17986559867858887
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,2,128,1,float16,fp8,0,0.17653599977493287
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,2,128,1,fp8,fp8,0,0.17398079633712768
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,4,128,1,float16,float16,0,0.1822543978691101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,4,128,1,float16,fp8,0,0.17657920122146606
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,4,128,1,fp8,fp8,0,0.17419999837875366
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,8,128,1,float16,float16,0,0.18731839656829835
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,8,128,1,float16,fp8,0,0.17676960229873656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,64,128,1,float16,float16,0,0.1417232036590576
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,64,128,1,float16,fp8,0,0.12838560342788696
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,64,128,1,fp8,fp8,0,0.12778400182723998
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,1,128,1,float16,float16,0,0.10047680139541626
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,1,128,1,float16,fp8,0,0.09542080163955688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,1,128,1,fp8,fp8,0,0.09524639844894409
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,2,128,1,float16,float16,0,0.10050400495529174
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,2,128,1,float16,fp8,0,0.09506239891052246
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,2,128,1,fp8,fp8,0,0.09507359862327576
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,4,128,1,float16,float16,0,0.10194720029830932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,1,128,1,float16,float16,0,0.35752320289611816
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,64,128,1,fp8,fp8,0,0.453115177154541
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,4,128,1,float16,fp8,0,0.09547039866447449
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,4,128,1,fp8,fp8,0,0.09482240080833435
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,8,128,1,float16,float16,0,0.1041375994682312
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,8,128,1,float16,fp8,0,0.09578719735145569
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,8,128,1,fp8,fp8,0,0.09519680142402649
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,64,128,1,float16,float16,0,0.08101119995117187
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,64,128,1,float16,fp8,0,0.07242720127105713
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,64,128,1,fp8,fp8,0,0.0717519998550415
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,1,128,1,float16,float16,0,0.05866559743881226
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,1,128,1,float16,fp8,0,0.05576000213623047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,1,128,1,fp8,fp8,0,0.05543680191040039
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,2,128,1,float16,float16,0,0.058462399244308474
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,2,128,1,float16,fp8,0,0.05580000281333923
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,2,128,1,fp8,fp8,0,0.05556960105895996
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,4,128,1,float16,float16,0,0.05884479880332947
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,4,128,1,float16,fp8,0,0.05575680136680603
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,4,128,1,fp8,fp8,0,0.05552800297737122
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,8,128,1,float16,float16,0,0.060063999891281125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,8,128,1,float16,fp8,0,0.05574880242347717
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,8,128,1,fp8,fp8,0,0.05568159818649292
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,8,128,1,fp8,fp8,0,0.33276479244232177
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,64,128,1,fp8,fp8,0,0.043137601017951964
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,64,128,1,float16,fp8,0,0.23616960048675537
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,1,128,1,float16,float16,0,0.03738720118999481
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,1,128,1,float16,fp8,0,0.03513599932193756
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,1,128,1,fp8,fp8,0,0.0353408008813858
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,2,128,1,float16,float16,0,0.03731200098991394
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,2,128,1,float16,fp8,0,0.03519839942455292
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,2,128,1,fp8,fp8,0,0.03523840010166168
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,4,128,1,float16,float16,0,0.03781920075416565
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,4,128,1,float16,fp8,0,0.03513120114803314
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,4,128,1,fp8,fp8,0,0.03563840091228485
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,8,128,1,float16,float16,0,0.03919360041618347
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,8,128,1,float16,fp8,0,0.03544960021972656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,8,128,1,fp8,fp8,0,0.035559999942779544
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,8,128,1,float16,fp8,0,1.2723679542541504
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,8,128,1,fp8,fp8,0,0.17417440414428711
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,64,1,128,1,float16,fp8,0,1.8571216583251953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,64,1,128,1,float16,float16,0,1.882302474975586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,64,2,128,1,float16,float16,0,1.933278465270996
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,64,2,128,1,float16,fp8,0,1.8589088439941406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,64,128,1,float16,float16,0,0.046291199326515195
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,64,2,128,1,fp8,fp8,0,1.8542112350463866
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,64,4,128,1,float16,fp8,0,1.9326400756835938
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,64,4,128,1,float16,float16,0,2.081551933288574
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,64,4,128,1,fp8,fp8,0,1.8579967498779297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,64,8,128,1,float16,fp8,0,1.854515266418457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,64,8,128,1,float16,float16,0,2.04616641998291
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,64,1,128,1,fp8,fp8,0,1.8616479873657226
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,64,64,128,1,float16,fp8,0,1.4945903778076173
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,64,8,128,1,fp8,fp8,0,1.852881622314453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,64,1,128,1,float16,float16,0,0.9519871711730957
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,64,64,128,1,float16,float16,0,1.6554079055786133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,64,1,128,1,float16,fp8,0,1.0360799789428712
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,64,64,128,1,fp8,fp8,0,1.4179391860961914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,64,128,1,float16,fp8,0,0.043244799971580504
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,64,1,128,1,fp8,fp8,0,0.937492847442627
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,64,2,128,1,float16,float16,0,0.9540911674499511
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,64,2,128,1,fp8,fp8,0,0.9695296287536621
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,64,4,128,1,float16,fp8,0,0.9593680381774903
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,64,4,128,1,float16,float16,0,1.0257760047912599
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,64,4,128,1,fp8,fp8,0,0.9356255531311035
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,64,8,128,1,float16,fp8,0,0.9348015785217285
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,64,8,128,1,float16,float16,0,1.0050368309020996
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,64,1,128,1,float16,float16,0,0.47642078399658205
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,64,1,128,1,float16,fp8,0,0.47542080879211424
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,64,64,128,1,float16,fp8,0,0.7342175960540771
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,64,8,128,1,fp8,fp8,0,0.9374719619750976
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,64,64,128,1,float16,float16,0,0.8257328033447265
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,64,64,128,1,fp8,fp8,0,0.7816639900207519
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,64,1,128,1,fp8,fp8,0,0.4757215976715088
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,64,2,128,1,float16,float16,0,0.47842721939086913
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,64,2,128,1,float16,fp8,0,0.4745344161987305
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,64,2,128,1,fp8,fp8,0,0.47484002113342283
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,64,4,128,1,float16,float16,0,0.49027361869812014
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,64,4,128,1,float16,fp8,0,0.4843696117401123
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,64,4,128,1,fp8,fp8,0,0.4997407913208008
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,64,8,128,1,float16,float16,0,0.5172768115997315
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,64,1,128,1,float16,float16,0,0.2524768114089966
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,64,64,128,1,float16,fp8,0,0.3681152105331421
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,64,8,128,1,float16,fp8,0,0.47460160255432127
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,64,8,128,1,fp8,fp8,0,0.47445120811462405
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,64,64,128,1,fp8,fp8,0,0.36878719329833987
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,64,1,128,1,float16,fp8,0,0.25010559558868406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,64,1,128,1,fp8,fp8,0,0.2510240077972412
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,64,2,128,1,float16,float16,0,0.25669920444488525
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,64,2,128,1,float16,fp8,0,0.24487199783325195
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,64,2,128,1,fp8,fp8,0,0.24498400688171387
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,64,4,128,1,float16,float16,0,0.25419518947601316
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,64,4,128,1,float16,fp8,0,0.24686079025268554
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,64,4,128,1,fp8,fp8,0,0.24649600982666015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,64,8,128,1,float16,float16,0,0.2655872106552124
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,64,64,128,1,float16,float16,0,0.21486399173736573
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,64,8,128,1,float16,fp8,0,0.24522719383239747
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,64,64,128,1,float16,fp8,0,0.19113119840621948
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,64,1,128,1,float16,float16,0,0.13465440273284912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,64,64,128,1,fp8,fp8,0,0.191046404838562
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,64,1,128,1,float16,fp8,0,0.1312608003616333
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,64,1,128,1,fp8,fp8,0,0.13006240129470825
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,64,2,128,1,float16,float16,0,0.1339184045791626
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,64,2,128,1,float16,fp8,0,0.1300480008125305
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,64,2,128,1,fp8,fp8,0,0.130348801612854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,64,4,128,1,float16,float16,0,0.13485599756240846
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,64,4,128,1,float16,fp8,0,0.13033440113067626
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,64,2,128,1,float16,fp8,0,0.9341279983520507
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,64,4,128,1,fp8,fp8,0,0.12983039617538453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,64,8,128,1,float16,float16,0,0.1392176032066345
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,64,8,128,1,float16,fp8,0,0.1295151948928833
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,64,8,128,1,fp8,fp8,0,0.13035039901733397
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,64,64,128,1,float16,fp8,0,0.10334880352020263
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,64,64,128,1,float16,float16,0,0.11535680294036865
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,64,1,128,1,float16,float16,0,0.07466560006141662
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,64,1,128,1,float16,fp8,0,0.07141600251197815
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,64,1,128,1,fp8,fp8,0,0.07149119973182679
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,64,2,128,1,float16,fp8,0,0.07153919935226441
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,64,2,128,1,fp8,fp8,0,0.0709775984287262
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,64,4,128,1,float16,float16,0,0.07620159983634948
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,64,4,128,1,float16,fp8,0,0.07147520184516906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,64,4,128,1,fp8,fp8,0,0.07209920287132263
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,64,8,128,1,float16,float16,0,0.07845600247383118
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,64,8,128,1,float16,fp8,0,0.07221599817276
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,64,8,128,1,fp8,fp8,0,0.07210080027580261
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,64,64,128,1,float16,float16,0,0.06459519863128663
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,64,64,128,1,float16,fp8,0,0.05754719972610474
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,64,64,128,1,fp8,fp8,0,0.05768479704856873
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,64,1,128,1,float16,float16,0,0.043105599284172055
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,64,1,128,1,float16,fp8,0,0.041201600432395936
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,64,1,128,1,fp8,fp8,0,0.04118880033493042
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,64,2,128,1,float16,float16,0,0.043296000361442565
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,64,2,128,1,float16,fp8,0,0.04118559956550598
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,64,2,128,1,fp8,fp8,0,0.0411871999502182
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,64,4,128,1,float16,fp8,0,0.041177600622177124
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,64,4,128,1,fp8,fp8,0,0.04118399918079376
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,64,8,128,1,float16,float16,0,0.04507679939270019
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,64,8,128,1,float16,fp8,0,0.04121440052986145
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,64,8,128,1,fp8,fp8,0,0.04120799899101257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,64,64,128,1,float16,fp8,0,0.03351039886474609
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,64,64,128,1,fp8,fp8,0,0.03307519853115082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,64,1,128,1,float16,float16,0,0.027852800488471986
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,64,1,128,1,float16,fp8,0,0.026844799518585205
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,64,64,128,1,float16,float16,0,0.41528801918029784
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,64,1,128,1,fp8,fp8,0,0.026876801252365114
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,64,2,128,1,float16,float16,0,0.028352001309394838
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,64,2,128,1,fp8,fp8,0,0.026825600862503053
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,64,4,128,1,float16,float16,0,0.028896000981330872
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,64,4,128,1,float16,fp8,0,0.026787200570106508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,64,4,128,1,fp8,fp8,0,0.02686559855937958
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,64,8,128,1,fp8,fp8,0,0.2448496103286743
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,64,8,128,1,float16,fp8,0,0.02683199942111969
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,64,8,128,1,float16,float16,0,0.02895039916038513
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,64,8,128,1,fp8,fp8,0,0.0268528014421463
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,64,64,128,1,float16,float16,0,0.020747199654579163
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,64,64,128,1,float16,fp8,0,0.020712000131607056
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,64,64,128,1,fp8,fp8,0,0.02080159932374954
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,64,1,128,1,float16,float16,0,0.018134400248527527
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,64,1,128,1,float16,fp8,0,0.01701280027627945
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,64,1,128,1,fp8,fp8,0,0.016575999557971954
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,64,2,128,1,float16,float16,0,0.018644799292087556
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,64,2,128,1,float16,fp8,0,0.016676799952983858
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,64,2,128,1,fp8,fp8,0,0.01722240000963211
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,64,4,128,1,float16,float16,0,0.018619200587272643
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,64,4,128,1,float16,fp8,0,0.016569599509239197
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,64,4,128,1,fp8,fp8,0,0.017129600048065186
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,64,64,128,1,fp8,fp8,0,0.10274560451507568
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,64,8,128,1,float16,float16,0,0.018649600446224213
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,64,8,128,1,float16,fp8,0,0.017004799842834473
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,64,8,128,1,fp8,fp8,0,0.0165583997964859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,64,2,128,1,float16,float16,0,0.07464640140533448
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,64,1,128,1,float16,fp8,0,0.7611311912536621
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,64,1,128,1,fp8,fp8,0,0.7592639923095703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,64,64,128,1,float16,float16,0,0.03710080087184906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,64,2,128,1,float16,fp8,0,0.7607088088989258
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,64,2,128,1,fp8,fp8,0,0.7592735767364502
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,64,2,128,1,float16,fp8,0,0.02686559855937958
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,64,4,128,1,float16,float16,0,0.7725279808044434
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,64,4,128,1,float16,fp8,0,0.7600080013275147
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,64,4,128,1,fp8,fp8,0,0.7590384006500244
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,64,8,128,1,float16,float16,0,0.8141360282897949
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,64,8,128,1,float16,fp8,0,0.7591455936431885
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,64,8,128,1,fp8,fp8,0,0.7597040176391602
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,64,1,128,1,float16,float16,0,0.38207359313964845
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,64,64,128,1,float16,float16,0,0.7202735900878906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,64,64,128,1,float16,fp8,0,0.639406394958496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,64,64,128,1,fp8,fp8,0,0.624132776260376
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,64,1,128,1,float16,fp8,0,0.3869136095046997
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,64,1,128,1,fp8,fp8,0,0.3868767976760864
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,64,2,128,1,float16,float16,0,0.38367199897766113
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,64,2,128,1,float16,fp8,0,0.38970720767974854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,64,2,128,1,fp8,fp8,0,0.3871648073196411
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,64,4,128,1,float16,float16,0,0.04354879856109619
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,64,2,128,1,float16,float16,0,0.7513311862945556
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,64,4,128,1,float16,float16,0,0.3939647912979126
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,64,4,128,1,float16,fp8,0,0.38655519485473633
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,64,4,128,1,fp8,fp8,0,0.3875312089920044
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,64,8,128,1,float16,float16,0,0.41794562339782715
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,64,8,128,1,float16,fp8,0,0.3871648073196411
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,64,64,128,1,float16,float16,0,0.36367359161376955
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,64,64,128,1,float16,fp8,0,0.3176608085632324
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,64,1,128,1,float16,float16,0,0.1988271951675415
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,64,64,128,1,fp8,fp8,0,0.31735520362854003
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,64,1,128,1,float16,fp8,0,0.19919999837875366
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,64,2,128,1,float16,float16,0,0.19914560317993163
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,64,2,128,1,float16,fp8,0,0.19914560317993163
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,64,4,128,1,float16,float16,0,0.20449440479278563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,64,4,128,1,float16,fp8,0,0.19924160242080688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,64,4,128,1,fp8,fp8,0,0.19898079633712767
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,64,8,128,1,float16,float16,0,0.21421120166778565
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,64,8,128,1,fp8,fp8,0,0.19946880340576173
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,64,64,128,1,float16,fp8,0,0.16444640159606932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,64,64,128,1,fp8,fp8,0,0.16431200504302979
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,64,1,128,1,float16,float16,0,0.10565600395202637
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,64,1,128,1,float16,fp8,0,0.10416799783706665
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,64,1,128,1,fp8,fp8,0,0.1040719985961914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,64,2,128,1,float16,float16,0,0.10627679824829102
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,64,2,128,1,float16,fp8,0,0.10455520153045654
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,64,2,128,1,fp8,fp8,0,0.10437439680099488
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,64,4,128,1,float16,float16,0,0.10839840173721313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,64,1,128,1,float16,float16,0,0.749457597732544
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,64,4,128,1,float16,fp8,0,0.1047327995300293
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,64,4,128,1,fp8,fp8,0,0.10388959646224975
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,64,8,128,1,float16,float16,0,0.1130687952041626
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,64,8,128,1,float16,fp8,0,0.10518560409545899
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,64,8,128,1,fp8,fp8,0,0.3867343902587891
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,64,8,128,1,fp8,fp8,0,0.10496000051498414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,64,64,128,1,float16,float16,0,0.1008687973022461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,64,64,128,1,float16,fp8,0,0.08808159828186035
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,64,1,128,1,float16,float16,0,0.05894240140914917
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,64,1,128,1,float16,fp8,0,0.05581120252609253
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,64,1,128,1,fp8,fp8,0,0.19775840044021606
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,64,2,128,1,fp8,fp8,0,0.19825279712677002
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,64,1,128,1,fp8,fp8,0,0.05569919943809509
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,64,2,128,1,float16,float16,0,0.05867360234260559
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,64,2,128,1,float16,fp8,0,0.05572320222854614
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,64,2,128,1,fp8,fp8,0,0.0559328019618988
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,64,4,128,1,float16,float16,0,0.06010559797286987
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,64,4,128,1,float16,fp8,0,0.056385600566864015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,64,4,128,1,fp8,fp8,0,0.055460798740386966
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,64,8,128,1,float16,fp8,0,0.05637120008468628
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,64,8,128,1,float16,float16,0,0.0624783992767334
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,64,8,128,1,fp8,fp8,0,0.05737760066986084
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,64,64,128,1,float16,float16,0,0.05782719850540161
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,64,64,128,1,fp8,fp8,0,0.05053439736366272
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,64,1,128,1,float16,fp8,0,0.03347040116786957
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,64,8,128,1,float16,fp8,0,0.19967520236968994
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,64,64,128,1,float16,fp8,0,0.05121440291404724
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,64,1,128,1,fp8,fp8,0,0.03356800079345703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,64,2,128,1,float16,float16,0,0.03504000008106232
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,64,2,128,1,fp8,fp8,0,0.033899199962615964
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,64,4,128,1,float16,float16,0,0.035180801153182985
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,64,4,128,1,float16,fp8,0,0.03381919860839844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,64,4,128,1,fp8,fp8,0,0.03395360112190247
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,64,8,128,1,float16,float16,0,0.03707680106163025
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,64,64,128,1,float16,float16,0,0.1879215955734253
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,64,8,128,1,float16,fp8,0,0.03485920131206512
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,64,8,128,1,fp8,fp8,0,0.033764800429344176
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,64,64,128,1,float16,float16,0,0.029732799530029295
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,64,64,128,1,fp8,fp8,0,0.028782400488853454
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,64,1,128,1,float16,float16,0,0.020747199654579163
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,64,1,128,1,float16,fp8,0,0.020744000375270844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,64,1,128,1,fp8,fp8,0,0.02062080055475235
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,64,2,128,1,float16,float16,0,0.02066880017518997
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,64,2,128,1,float16,fp8,0,0.020718400180339814
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,64,2,128,1,fp8,fp8,0,0.020654399693012238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,64,4,128,1,float16,float16,0,0.022308799624443054
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,64,4,128,1,float16,fp8,0,0.020678399503231047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,64,4,128,1,fp8,fp8,0,0.020707200467586517
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,64,8,128,1,float16,float16,0,0.022785599529743194
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,64,8,128,1,float16,fp8,0,0.020759999752044678
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,64,8,128,1,fp8,fp8,0,0.020716799795627593
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,64,64,128,1,float16,float16,0,0.018555200099945067
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,64,64,128,1,float16,fp8,0,0.018580800294876097
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,64,64,128,1,fp8,fp8,0,0.018588800728321076
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,64,1,128,1,float16,float16,0,0.014505599439144135
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,64,1,128,1,float16,fp8,0,0.014569599926471711
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,64,1,128,1,fp8,fp8,0,0.014560000598430633
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,64,2,128,1,float16,float16,0,0.014505599439144135
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,64,2,128,1,float16,fp8,0,0.014537599682807923
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,64,2,128,1,fp8,fp8,0,0.014483200013637542
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,64,4,128,1,float16,fp8,0,0.014550399780273438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,64,8,128,1,float16,float16,0,0.014534400403499603
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,64,8,128,1,float16,fp8,0,0.014553600549697876
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,64,8,128,1,fp8,fp8,0,0.014584000408649444
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,64,64,128,1,float16,float16,0,0.014947199821472168
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,64,64,128,1,fp8,fp8,0,0.014612799882888794
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,64,1,128,1,float16,float16,0,0.013363200426101684
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,64,1,128,1,float16,fp8,0,0.012612800300121307
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,64,1,128,1,fp8,fp8,0,0.01292639970779419
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,64,2,128,1,float16,float16,0,0.014511999487876893
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,64,2,128,1,float16,fp8,0,0.013201600313186646
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,64,2,128,1,fp8,fp8,0,0.012508800625801087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,64,4,128,1,float16,float16,0,0.013521599769592284
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,64,4,128,1,float16,fp8,0,0.012548799812793731
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,64,4,128,1,fp8,fp8,0,0.012542399764060973
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,64,64,128,1,fp8,fp8,0,0.0872048020362854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,64,8,128,1,float16,fp8,0,0.012608000636100769
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,64,1,128,1,float16,float16,0,0.035006400942802426
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,64,8,128,1,fp8,fp8,0,0.012491200119256973
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,64,2,128,1,float16,fp8,0,0.03503200113773346
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,64,64,128,1,float16,fp8,0,0.028910401463508605
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,64,1,128,1,float16,float16,0,0.47112321853637695
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,64,1,128,1,float16,fp8,0,0.4811408042907715
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,64,1,128,1,fp8,fp8,0,0.48021440505981444
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,64,4,128,1,float16,float16,0,0.01454399973154068
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,64,2,128,1,float16,float16,0,0.4714047908782959
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,64,4,128,1,fp8,fp8,0,0.01446399986743927
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,64,2,128,1,fp8,fp8,0,0.4799056053161621
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,64,4,128,1,float16,float16,0,0.4812191963195801
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,64,64,128,1,float16,fp8,0,0.015574400126934052
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,64,4,128,1,float16,fp8,0,0.4797488212585449
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,64,4,128,1,fp8,fp8,0,0.4785583972930908
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,64,64,128,1,float16,float16,0,0.3998624086380005
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,64,64,128,1,float16,fp8,0,0.3626543998718262
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,64,8,128,1,float16,fp8,0,0.47931361198425293
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,64,8,128,1,fp8,fp8,0,0.47971677780151367
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,64,1,128,1,float16,float16,0,0.24284000396728517
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,64,1,128,1,float16,fp8,0,0.24611361026763917
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,64,64,128,1,fp8,fp8,0,0.3637151956558228
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,64,1,128,1,fp8,fp8,0,0.2466495990753174
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,64,2,128,1,float16,float16,0,0.2438512086868286
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,64,2,128,1,float16,fp8,0,0.2461535930633545
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,64,2,128,1,fp8,fp8,0,0.24628000259399413
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,64,4,128,1,float16,float16,0,0.24795999526977539
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,64,4,128,1,float16,fp8,0,0.24598560333251954
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,64,4,128,1,fp8,fp8,0,0.24611520767211914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,64,8,128,1,float16,float16,0,0.2567759990692139
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,64,8,128,1,float16,fp8,0,0.24624319076538087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,64,8,128,1,fp8,fp8,0,0.24598400592803954
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,64,64,128,1,float16,fp8,0,0.18716319799423217
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,64,64,128,1,fp8,fp8,0,0.18749279975891114
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,64,1,128,1,float16,float16,0,0.1277616024017334
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,64,1,128,1,float16,fp8,0,0.12789759635925294
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,64,1,128,1,fp8,fp8,0,0.1282240033149719
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,64,2,128,1,float16,float16,0,0.12807680368423463
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,64,2,128,1,float16,fp8,0,0.12788159847259523
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,64,2,128,1,fp8,fp8,0,0.12787359952926636
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,64,4,128,1,float16,fp8,0,0.12778079509735107
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,64,4,128,1,fp8,fp8,0,0.1281424045562744
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,64,8,128,1,float16,float16,0,0.13481760025024414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,64,2,128,1,float16,fp8,0,0.48043041229248046
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,64,8,128,1,fp8,fp8,0,0.1280303955078125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,64,64,128,1,float16,float16,0,0.10733439922332763
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,64,64,128,1,float16,fp8,0,0.09841279983520508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,64,64,128,1,fp8,fp8,0,0.09843040108680726
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,64,1,128,1,float16,float16,0,0.06909760236740112
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,64,1,128,1,float16,fp8,0,0.06780160069465638
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,64,1,128,1,fp8,fp8,0,0.06803039908409118
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,64,2,128,1,float16,float16,0,0.06952319741249084
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,64,2,128,1,float16,fp8,0,0.06776639819145203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,64,2,128,1,fp8,fp8,0,0.06810560226440429
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,64,4,128,1,float16,fp8,0,0.06798400282859803
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,64,4,128,1,fp8,fp8,0,0.06802080273628235
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,64,8,128,1,float16,float16,0,0.07319039702415467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,64,8,128,1,float16,fp8,0,0.06851680278778076
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,64,8,128,1,float16,float16,0,0.5015056133270264
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,64,64,128,1,float16,float16,0,0.058982402086257935
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,64,8,128,1,fp8,fp8,0,0.06889119744300842
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,64,64,128,1,fp8,fp8,0,0.053452801704406736
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,64,64,128,1,float16,fp8,0,0.05367519855499268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,64,1,128,1,float16,float16,0,0.0384768009185791
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,64,1,128,1,float16,fp8,0,0.03710399866104126
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,64,1,128,1,fp8,fp8,0,0.037108799815177916
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,64,2,128,1,float16,float16,0,0.03819040060043335
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,64,8,128,1,float16,float16,0,0.014552000164985656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,64,2,128,1,float16,fp8,0,0.03701600134372711
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,64,2,128,1,fp8,fp8,0,0.03711200058460236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,64,4,128,1,fp8,fp8,0,0.03707680106163025
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,64,4,128,1,float16,float16,0,0.038889598846435544
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,64,8,128,1,float16,float16,0,0.04088160097599029
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,64,8,128,1,float16,fp8,0,0.03706879913806915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,64,8,128,1,fp8,fp8,0,0.037118399143218996
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,64,64,128,1,float16,float16,0,0.033025598526000975
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,64,64,128,1,fp8,fp8,0,0.030955201387405394
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,64,1,128,1,float16,float16,0,0.02481440007686615
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,64,1,128,1,fp8,fp8,0,0.023004800081253052
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,64,1,128,1,float16,fp8,0,0.022728000581264497
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,64,2,128,1,float16,float16,0,0.02476159930229187
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,64,2,128,1,float16,fp8,0,0.022937600314617158
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,64,2,128,1,fp8,fp8,0,0.022776000201702118
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,64,4,128,1,float16,float16,0,0.02475679963827133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,64,64,128,1,float16,float16,0,0.2039247989654541
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,64,4,128,1,fp8,fp8,0,0.023286400735378264
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,64,8,128,1,float16,float16,0,0.024798400700092316
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,64,8,128,1,float16,fp8,0,0.023206399381160737
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,64,8,128,1,fp8,fp8,0,0.0232464000582695
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,64,64,128,1,float16,float16,0,0.01865279972553253
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,64,64,128,1,float16,fp8,0,0.01870400011539459
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,64,64,128,1,fp8,fp8,0,0.01860959976911545
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,64,1,128,1,float16,float16,0,0.016465599834918975
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,64,1,128,1,float16,fp8,0,0.014871999621391296
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,64,1,128,1,fp8,fp8,0,0.014560000598430633
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,64,4,128,1,float16,float16,0,0.13022719621658324
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,64,2,128,1,float16,float16,0,0.01648160070180893
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,64,2,128,1,float16,fp8,0,0.014627200365066529
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,64,2,128,1,fp8,fp8,0,0.014454400539398194
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,64,4,128,1,float16,float16,0,0.015166400372982025
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,64,4,128,1,float16,fp8,0,0.014529600739479065
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,64,4,128,1,fp8,fp8,0,0.014529600739479065
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,64,8,128,1,float16,float16,0,0.016551999747753142
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,64,8,128,1,float16,fp8,0,0.01456640064716339
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,64,64,128,1,float16,float16,0,0.012529599666595458
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,64,8,128,1,float16,fp8,0,0.12832800149917603
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,64,64,128,1,float16,fp8,0,0.01247519999742508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,64,64,128,1,fp8,fp8,0,0.012520000338554382
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,64,1,128,1,float16,float16,0,0.011046399921178817
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,64,1,128,1,fp8,fp8,0,0.010436800122261048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,64,1,128,1,float16,fp8,0,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,64,2,128,1,float16,float16,0,0.010763200372457505
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,64,2,128,1,float16,fp8,0,0.011059200018644333
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,64,2,128,1,fp8,fp8,0,0.010524799674749374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,64,4,128,1,float16,float16,0,0.01067039966583252
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,64,4,128,1,float16,fp8,0,0.010496000200510025
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,64,4,128,1,fp8,fp8,0,0.010702399909496308
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,64,4,128,1,float16,float16,0,0.07108479738235474
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,64,8,128,1,float16,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,64,64,128,1,float16,float16,0,0.01250240057706833
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,64,64,128,1,float16,fp8,0,0.011876799911260606
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,64,64,128,1,fp8,fp8,0,0.0123648002743721
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,64,1,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,64,1,128,1,float16,fp8,0,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,64,1,128,1,fp8,fp8,0,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,64,2,128,1,float16,float16,0,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,64,2,128,1,float16,fp8,0,0.010476800054311753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,64,4,128,1,float16,fp8,0,0.03711999952793121
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,64,2,128,1,fp8,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,64,4,128,1,float16,float16,0,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,64,4,128,1,float16,fp8,0,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,64,64,128,1,float16,fp8,0,0.030849599838256837
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,64,8,128,1,float16,float16,0,0.010566399991512298
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,64,8,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,64,8,128,1,fp8,fp8,0,0.010476800054311753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,64,4,128,1,float16,fp8,0,0.022742399573326112
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,64,1,128,1,float16,fp8,0,0.3947632074356079
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,64,1,128,1,fp8,fp8,0,0.3942447900772095
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,64,2,128,1,float16,float16,0,0.39600160121917727
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,64,8,128,1,fp8,fp8,0,0.014918400347232819
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,64,2,128,1,float16,fp8,0,0.39436318874359133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,64,2,128,1,fp8,fp8,0,0.39445760250091555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,64,4,128,1,float16,float16,0,0.4000703811645508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,64,4,128,1,float16,fp8,0,0.39361441135406494
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,64,4,128,1,fp8,fp8,0,0.39404799938201907
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,64,8,128,1,float16,float16,0,0.41017441749572753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,64,8,128,1,float16,float16,0,0.010500799864530563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,64,8,128,1,float16,fp8,0,0.39402079582214355
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,64,8,128,1,fp8,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,64,64,128,1,float16,float16,0,0.28010239601135256
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,64,8,128,1,fp8,fp8,0,0.39367520809173584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,64,64,128,1,float16,fp8,0,0.2624847888946533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,64,64,128,1,fp8,fp8,0,0.2602880001068115
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,64,1,128,1,float16,fp8,0,0.20214719772338868
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,64,1,128,1,fp8,fp8,0,0.20355839729309083
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,64,2,128,1,float16,float16,0,0.2045520067214966
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,64,2,128,1,fp8,fp8,0,0.20240480899810792
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,64,4,128,1,float16,float16,0,0.2071455955505371
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,64,4,128,1,float16,fp8,0,0.20290079116821289
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,64,8,128,1,float16,float16,0,0.211296010017395
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,64,8,128,1,float16,fp8,0,0.20303680896759033
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,64,64,128,1,float16,float16,0,0.14527839422225952
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,64,8,128,1,fp8,fp8,0,0.20332798957824708
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,64,4,128,1,fp8,fp8,0,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,64,64,128,1,float16,fp8,0,0.13617440462112426
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,64,1,128,1,float16,float16,0,0.10876959562301636
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,64,1,128,1,float16,fp8,0,0.10560640096664428
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,64,1,128,1,fp8,fp8,0,0.10583679676055908
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,64,2,128,1,float16,float16,0,0.10947519540786743
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,64,2,128,1,float16,fp8,0,0.10608960390090942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,64,2,128,1,fp8,fp8,0,0.1057695984840393
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,64,4,128,1,float16,float16,0,0.1105072021484375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,64,1,128,1,float16,float16,0,0.39544799327850344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,64,4,128,1,fp8,fp8,0,0.10618079900741577
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,64,8,128,1,float16,float16,0,0.11303999423980712
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,64,8,128,1,fp8,fp8,0,0.10644960403442383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,64,64,128,1,float16,float16,0,0.0783456027507782
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,64,64,128,1,float16,fp8,0,0.072707200050354
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,64,64,128,1,fp8,fp8,0,0.07314879894256592
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,64,1,128,1,float16,float16,0,0.05878239870071411
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,64,1,128,1,fp8,fp8,0,0.05677919983863831
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,64,2,128,1,float16,float16,0,0.05902559757232666
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,64,2,128,1,float16,fp8,0,0.056443202495574954
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,64,2,128,1,fp8,fp8,0,0.056380802392959596
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,64,4,128,1,float16,float16,0,0.05923839807510376
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,64,4,128,1,float16,fp8,0,0.05673760175704956
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,64,4,128,1,fp8,fp8,0,0.05677279829978943
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,64,1,128,1,float16,float16,0,0.20378561019897462
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,64,8,128,1,float16,float16,0,0.06149759888648987
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,64,8,128,1,float16,fp8,0,0.05664160251617432
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,64,8,128,1,fp8,fp8,0,0.056959998607635495
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,64,64,128,1,float16,float16,0,0.042393600940704344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,64,2,128,1,float16,fp8,0,0.20339360237121581
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,64,64,128,1,float16,fp8,0,0.0391072005033493
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,64,64,128,1,fp8,fp8,0,0.039105600118637084
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,64,1,128,1,float16,float16,0,0.033206400275230405
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,64,1,128,1,float16,fp8,0,0.03274720013141632
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,64,4,128,1,fp8,fp8,0,0.20279040336608886
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,64,1,128,1,fp8,fp8,0,0.031998398900032046
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,64,2,128,1,float16,float16,0,0.0333407998085022
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,64,2,128,1,float16,fp8,0,0.032425600290298465
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,64,2,128,1,fp8,fp8,0,0.031302401423454286
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,64,4,128,1,float16,float16,0,0.03328000009059906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,64,4,128,1,float16,fp8,0,0.03237760066986084
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,64,4,128,1,fp8,fp8,0,0.03118560016155243
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,64,8,128,1,float16,float16,0,0.0346560001373291
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,64,8,128,1,float16,fp8,0,0.03165439963340759
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,64,8,128,1,fp8,fp8,0,0.031140801310539246
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,64,64,128,1,float16,float16,0,0.024718399345874786
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,64,64,128,1,fp8,fp8,0,0.02478879988193512
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,64,64,128,1,fp8,fp8,0,0.1359536051750183
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,64,1,128,1,float16,float16,0,0.020691199600696562
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,64,1,128,1,float16,fp8,0,0.020641599595546723
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,64,1,128,1,fp8,fp8,0,0.0207056000828743
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,64,2,128,1,float16,fp8,0,0.020686399936676026
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,64,2,128,1,float16,float16,0,0.020769600570201874
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,64,4,128,1,float16,float16,0,0.02075839936733246
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,64,4,128,1,float16,fp8,0,0.020664000511169435
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,64,4,128,1,fp8,fp8,0,0.020662400126457214
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,64,4,128,1,float16,fp8,0,0.10629919767379761
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,64,8,128,1,float16,float16,0,0.02117920070886612
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,64,8,128,1,float16,fp8,0,0.020681600272655486
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,64,8,128,1,fp8,fp8,0,0.020708799362182617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,64,64,128,1,float16,float16,0,0.016606399416923524
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,64,64,128,1,float16,fp8,0,0.016574400663375854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,64,8,128,1,float16,fp8,0,0.10567840337753295
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,64,64,128,1,fp8,fp8,0,0.016220800578594208
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,64,1,128,1,float16,float16,0,0.014539200067520141
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,64,1,128,1,fp8,fp8,0,0.0144896000623703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,64,2,128,1,float16,fp8,0,0.01456640064716339
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,64,2,128,1,fp8,fp8,0,0.01446399986743927
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,64,4,128,1,float16,float16,0,0.01451359987258911
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,64,4,128,1,fp8,fp8,0,0.014545600116252898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,64,1,128,1,float16,fp8,0,0.05697119832038879
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,64,8,128,1,float16,float16,0,0.014590400457382201
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,64,8,128,1,float16,fp8,0,0.01459839940071106
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,64,8,128,1,fp8,fp8,0,0.014531199634075165
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,64,64,128,1,float16,float16,0,0.012534399330615998
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,64,64,128,1,float16,fp8,0,0.012443199753761292
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,64,64,128,1,fp8,fp8,0,0.010836800187826156
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,64,1,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,64,1,128,1,float16,fp8,0,0.010526400059461594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,64,1,128,1,fp8,fp8,0,0.010427200049161912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,64,2,128,1,float16,float16,0,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,64,2,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,64,2,128,1,fp8,fp8,0,0.010497599840164185
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,64,4,128,1,float16,float16,0,0.010438399761915207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,64,4,128,1,float16,fp8,0,0.010486400127410889
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,64,4,128,1,fp8,fp8,0,0.0104592002928257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,64,8,128,1,float16,float16,0,0.010471999645233154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,64,8,128,1,fp8,fp8,0,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,64,8,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,64,64,128,1,float16,float16,0,0.012507200241088867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,64,64,128,1,float16,fp8,0,0.010547199845314026
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,64,64,128,1,fp8,fp8,0,0.010500799864530563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,64,1,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,64,1,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,64,1,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,64,2,128,1,float16,float16,0,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,64,2,128,1,float16,fp8,0,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,64,2,128,1,fp8,fp8,0,0.010476800054311753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,64,4,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,64,4,128,1,float16,fp8,0,0.010428799688816071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,64,4,128,1,fp8,fp8,0,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,64,8,128,1,float16,float16,0,0.010420800000429154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,64,8,128,1,float16,fp8,0,0.010436800122261048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,64,8,128,1,fp8,fp8,0,0.010491199791431427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,64,1,128,1,float16,float16,0,0.35059680938720705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,64,1,128,1,float16,fp8,0,0.3488607883453369
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,64,1,128,1,fp8,fp8,0,0.34910399913787843
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,64,2,128,1,fp8,fp8,0,0.020644800364971162
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,64,2,128,1,float16,float16,0,0.3508960008621216
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,64,2,128,1,float16,fp8,0,0.34919679164886475
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,64,2,128,1,fp8,fp8,0,0.3491856098175049
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,64,4,128,1,float16,float16,0,0.35325601100921633
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,64,1,128,1,float16,fp8,0,0.014524799585342408
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,64,2,128,1,float16,float16,0,0.014582400023937226
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,64,4,128,1,float16,fp8,0,0.3490783929824829
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,64,4,128,1,fp8,fp8,0,0.34934399127960203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,64,8,128,1,float16,float16,0,0.35744318962097166
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,64,4,128,1,float16,fp8,0,0.014531199634075165
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,64,64,128,1,float16,float16,0,0.2170095920562744
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,64,64,128,1,float16,fp8,0,0.20960640907287598
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,64,8,128,1,fp8,fp8,0,0.3485392093658447
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,64,1,128,1,float16,float16,0,0.1833840012550354
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,64,1,128,1,float16,fp8,0,0.17908639907836915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,64,64,128,1,fp8,fp8,0,0.20993919372558595
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,64,1,128,1,fp8,fp8,0,0.17909120321273803
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,64,2,128,1,float16,float16,0,0.18237760066986083
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,64,2,128,1,float16,fp8,0,0.17976640462875365
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,64,2,128,1,fp8,fp8,0,0.1800528049468994
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,64,4,128,1,float16,float16,0,0.1837183952331543
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,64,4,128,1,float16,fp8,0,0.17987200021743774
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,64,4,128,1,fp8,fp8,0,0.17969599962234498
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,64,8,128,1,float16,float16,0,0.18601759672164916
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,64,64,128,1,float16,float16,0,0.11636639833450317
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,64,8,128,1,float16,fp8,0,0.18020800352096558
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,64,64,128,1,float16,fp8,0,0.11053279638290406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,64,64,128,1,fp8,fp8,0,0.11016000509262085
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,64,8,128,1,fp8,fp8,0,0.17981599569320678
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,64,1,128,1,float16,fp8,0,0.09402400255203247
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,64,1,128,1,fp8,fp8,0,0.09412000179290772
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,64,2,128,1,float16,fp8,0,0.09418560266494751
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,64,2,128,1,fp8,fp8,0,0.09383839964866639
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,64,4,128,1,float16,float16,0,0.09688479900360107
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,64,4,128,1,float16,fp8,0,0.09412320256233216
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,64,4,128,1,fp8,fp8,0,0.0939296007156372
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,64,8,128,1,float16,float16,0,0.09840160012245178
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,64,8,128,1,float16,fp8,0,0.09367520213127137
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,64,8,128,1,fp8,fp8,0,0.0934224009513855
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,64,64,128,1,float16,float16,0,0.0622111976146698
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,64,64,128,1,float16,fp8,0,0.05780320167541504
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,64,64,128,1,fp8,fp8,0,0.05776960253715515
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,64,1,128,1,float16,float16,0,0.05370240211486817
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,64,1,128,1,float16,fp8,0,0.0512287974357605
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,64,1,128,1,fp8,fp8,0,0.05072479844093323
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,64,2,128,1,float16,float16,0,0.053615999221801755
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,64,2,128,1,float16,fp8,0,0.05117440223693848
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,64,2,128,1,fp8,fp8,0,0.05108479857444763
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,64,4,128,1,float16,float16,0,0.05371040105819702
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,64,4,128,1,float16,fp8,0,0.05083839893341065
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,64,4,128,1,fp8,fp8,0,0.05098559856414795
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,64,8,128,1,fp8,fp8,0,0.05124800205230713
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,64,64,128,1,float16,float16,0,0.03453760147094727
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,64,64,128,1,float16,fp8,0,0.024823999404907225
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,64,64,128,1,float16,fp8,0,0.03298079967498779
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,64,64,128,1,fp8,fp8,0,0.032971200346946714
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,64,1,128,1,float16,float16,0,0.031159999966621398
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,64,1,128,1,float16,fp8,0,0.028910401463508605
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,64,1,128,1,fp8,fp8,0,0.028884801268577575
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,64,2,128,1,float16,float16,0,0.031014400720596313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,64,2,128,1,float16,fp8,0,0.028886398673057555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,64,2,128,1,fp8,fp8,0,0.0289247989654541
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,64,4,128,1,float16,float16,0,0.030953601002693176
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,64,4,128,1,float16,fp8,0,0.02889760136604309
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,64,4,128,1,fp8,fp8,0,0.028892800211906433
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,64,8,128,1,float16,float16,0,0.03152480125427246
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,64,8,128,1,float16,fp8,0,0.028910401463508605
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,64,8,128,1,fp8,fp8,0,0.028963199257850646
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,64,64,128,1,float16,float16,0,0.022652800381183624
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,64,64,128,1,fp8,fp8,0,0.02072640061378479
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,64,1,128,1,float16,float16,0,0.020606400072574617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,64,8,128,1,float16,fp8,0,0.34904320240020753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,64,1,128,1,float16,fp8,0,0.019014400243759156
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,64,1,128,1,fp8,fp8,0,0.018695999681949616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,64,64,128,1,float16,fp8,0,0.02091040015220642
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,64,2,128,1,float16,float16,0,0.020707200467586517
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,64,2,128,1,float16,fp8,0,0.01865600049495697
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,64,2,128,1,fp8,fp8,0,0.018854400515556334
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,64,4,128,1,float16,fp8,0,0.018667200207710268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,64,4,128,1,fp8,fp8,0,0.018884800374507904
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,64,8,128,1,float16,float16,0,0.020449599623680113
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,64,8,128,1,float16,fp8,0,0.019016000628471374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,64,8,128,1,fp8,fp8,0,0.018646399676799773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,64,64,128,1,float16,float16,0,0.014662399888038635
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,64,64,128,1,float16,fp8,0,0.014524799585342408
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,64,64,128,1,fp8,fp8,0,0.014572800695896148
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,64,1,128,1,float16,float16,0,0.01451520025730133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,64,1,128,1,float16,fp8,0,0.012967999279499053
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,64,1,128,1,fp8,fp8,0,0.012830400466918945
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,64,1,128,1,float16,float16,0,0.09723520278930664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,64,2,128,1,float16,float16,0,0.013075199723243714
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,64,2,128,1,float16,fp8,0,0.013009600341320038
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,64,4,128,1,float16,float16,0,0.013199999928474426
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,64,2,128,1,fp8,fp8,0,0.013016000390052795
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,64,4,128,1,float16,fp8,0,0.012441600114107132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,64,4,128,1,fp8,fp8,0,0.01250080019235611
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,64,8,128,1,float16,fp8,0,0.012476799637079239
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,64,8,128,1,fp8,fp8,0,0.01252480000257492
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,64,8,128,1,float16,float16,0,0.014507199823856353
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,64,64,128,1,float16,float16,0,0.012415999919176102
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,64,64,128,1,float16,fp8,0,0.010425599664449692
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,64,1,128,1,float16,float16,0,0.010382399708032609
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,64,64,128,1,fp8,fp8,0,0.010447999835014344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,64,1,128,1,float16,fp8,0,0.010371199995279311
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,64,1,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,64,2,128,1,float16,float16,0,0.010476800054311753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,64,2,128,1,float16,fp8,0,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,64,2,128,1,fp8,fp8,0,0.0103472001850605
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,64,4,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,64,4,128,1,float16,fp8,0,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,64,8,128,1,float16,float16,0,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,64,8,128,1,float16,fp8,0,0.01037919968366623
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,64,8,128,1,fp8,fp8,0,0.010496000200510025
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,64,64,128,1,float16,float16,0,0.01239520013332367
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,64,64,128,1,float16,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,64,64,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,64,1,128,1,float16,float16,0,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,64,8,128,1,float16,float16,0,0.05428479909896851
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,64,1,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,64,2,128,1,float16,fp8,0,0.009536000341176987
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,64,2,128,1,fp8,fp8,0,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,64,4,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,64,4,128,1,float16,fp8,0,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,64,4,128,1,fp8,fp8,0,0.010380800068378448
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,64,8,128,1,float16,float16,0,0.010475199669599533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,64,8,128,1,fp8,fp8,0,0.010425599664449692
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,64,1,128,1,float16,float16,0,0.3410048007965088
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,64,1,128,1,float16,fp8,0,0.3292367935180664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,64,4,128,1,float16,float16,0,0.020609599351882935
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,64,1,128,1,fp8,fp8,0,0.32892160415649413
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,64,2,128,1,float16,float16,0,0.3408031940460205
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,64,2,128,1,float16,float16,0,0.09793440103530884
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,64,2,128,1,float16,fp8,0,0.32905280590057373
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,64,2,128,1,fp8,fp8,0,0.3291568040847778
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,64,4,128,1,fp8,fp8,0,0.010487999767065048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,64,4,128,1,float16,float16,0,0.3418175935745239
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,64,4,128,1,float16,fp8,0,0.32932319641113283
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,64,8,128,1,float16,fp8,0,0.05098080039024353
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,64,1,128,1,fp8,fp8,0,0.0095040000975132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,64,4,128,1,fp8,fp8,0,0.3288336038589478
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,64,8,128,1,float16,float16,0,0.3446000099182129
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,64,8,128,1,float16,fp8,0,0.32914559841156005
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,64,64,128,1,float16,float16,0,0.19436800479888916
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,64,64,128,1,float16,fp8,0,0.18519840240478516
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,64,8,128,1,fp8,fp8,0,0.3282655954360962
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,64,64,128,1,fp8,fp8,0,0.18529599905014038
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,64,1,128,1,float16,float16,0,0.17698559761047364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,64,1,128,1,float16,fp8,0,0.16822079420089722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,64,2,128,1,float16,float16,0,0.1763327956199646
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,64,2,128,1,float16,fp8,0,0.1683359980583191
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,64,2,128,1,fp8,fp8,0,0.16864320039749145
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,64,4,128,1,float16,float16,0,0.1770416021347046
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,64,4,128,1,float16,fp8,0,0.16832319498062134
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,64,4,128,1,fp8,fp8,0,0.16872639656066896
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,64,8,128,1,float16,float16,0,0.17911360263824463
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,64,8,128,1,float16,fp8,0,0.16874560117721557
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,64,8,128,1,fp8,fp8,0,0.16813440322875978
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,64,64,128,1,float16,float16,0,0.10384639501571655
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,64,64,128,1,float16,fp8,0,0.09656000137329102
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,64,1,128,1,float16,float16,0,0.09410880208015442
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,64,64,128,1,fp8,fp8,0,0.09661120176315308
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,64,1,128,1,float16,fp8,0,0.08877919912338257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,64,1,128,1,fp8,fp8,0,0.0887167990207672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,64,2,128,1,float16,float16,0,0.09422079920768738
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,64,2,128,1,float16,fp8,0,0.08903200030326844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,64,2,128,1,fp8,fp8,0,0.08889279961585998
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,64,4,128,1,float16,fp8,0,0.08857920169830322
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,64,4,128,1,float16,float16,0,0.09460960030555725
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,64,4,128,1,fp8,fp8,0,0.08898720145225525
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,64,8,128,1,float16,float16,0,0.0946672022342682
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,64,8,128,1,float16,fp8,0,0.08856480121612549
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,64,64,128,1,float16,float16,0,0.055795198678970336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,64,8,128,1,fp8,fp8,0,0.0887440025806427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,64,64,128,1,float16,fp8,0,0.05252640247344971
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,64,64,128,1,fp8,fp8,0,0.051948797702789304
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,64,1,128,1,float16,fp8,0,0.04916000068187713
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,64,1,128,1,float16,float16,0,0.05356799960136414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,64,1,128,1,fp8,fp8,0,0.049112001061439516
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,64,2,128,1,float16,fp8,0,0.04888159930706024
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,64,2,128,1,fp8,fp8,0,0.0491344004869461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,64,4,128,1,float16,float16,0,0.05268480181694031
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,64,4,128,1,fp8,fp8,0,0.04917599856853485
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,64,8,128,1,float16,float16,0,0.05275999903678894
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,64,8,128,1,float16,fp8,0,0.049369600415229795
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,64,8,128,1,fp8,fp8,0,0.04905439913272858
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,64,64,128,1,float16,float16,0,0.032974401116371156
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,64,64,128,1,float16,fp8,0,0.030744001269340515
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,64,64,128,1,fp8,fp8,0,0.030616000294685364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,64,1,128,1,float16,float16,0,0.03094240128993988
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,64,1,128,1,float16,fp8,0,0.02847520112991333
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,64,1,128,1,fp8,fp8,0,0.02882240116596222
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,64,2,128,1,float16,float16,0,0.030913600325584413
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,64,2,128,1,float16,fp8,0,0.02892000079154968
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,64,2,128,1,fp8,fp8,0,0.028830400109291075
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,64,4,128,1,float16,float16,0,0.03094080090522766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,64,4,128,1,float16,fp8,0,0.02886880040168762
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,64,4,128,1,fp8,fp8,0,0.02889919877052307
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,64,8,128,1,float16,float16,0,0.030902400612831116
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,64,8,128,1,float16,fp8,0,0.028911998867988585
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,64,8,128,1,fp8,fp8,0,0.02887200117111206
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,64,64,128,1,float16,float16,0,0.02085919976234436
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,64,64,128,1,float16,fp8,0,0.019351999461650848
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,64,64,128,1,fp8,fp8,0,0.019232000410556793
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,64,1,128,1,float16,float16,0,0.01935359984636307
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,64,8,128,1,float16,fp8,0,0.010385599732398988
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,64,1,128,1,float16,fp8,0,0.018694399297237395
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,64,1,128,1,fp8,fp8,0,0.018651199340820313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,64,2,128,1,float16,float16,0,0.020467199385166168
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,64,2,128,1,float16,fp8,0,0.018692800402641298
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,64,2,128,1,fp8,fp8,0,0.018622399866580965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,64,1,128,1,fp8,fp8,0,0.16839359998703002
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,64,4,128,1,float16,float16,0,0.019526399672031403
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,64,4,128,1,float16,fp8,0,0.018644799292087556
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,64,8,128,1,float16,float16,0,0.019985599815845488
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,64,8,128,1,float16,fp8,0,0.018632000684738158
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,64,4,128,1,fp8,fp8,0,0.01865759938955307
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,64,64,128,1,float16,float16,0,0.014604799449443817
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,64,64,128,1,float16,fp8,0,0.013007999956607818
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,64,8,128,1,fp8,fp8,0,0.01871040016412735
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,64,64,128,1,fp8,fp8,0,0.012830400466918945
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,64,1,128,1,float16,float16,0,0.012479999661445617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,64,2,128,1,float16,float16,0,0.012510399520397186
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,64,1,128,1,fp8,fp8,0,0.012555199861526489
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,64,2,128,1,fp8,fp8,0,0.012494400143623352
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,64,4,128,1,float16,float16,0,0.012540799379348756
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,64,4,128,1,float16,fp8,0,0.012904000282287598
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,64,4,128,1,fp8,fp8,0,0.012516799569129943
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,64,8,128,1,float16,float16,0,0.014406399428844452
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,64,8,128,1,float16,fp8,0,0.012656000256538392
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,64,8,128,1,fp8,fp8,0,0.012887999415397644
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,64,64,128,1,float16,float16,0,0.012444800138473511
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,64,64,128,1,float16,fp8,0,0.010427200049161912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,64,64,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,64,1,128,1,float16,float16,0,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,64,1,128,1,float16,fp8,0,0.010435199737548828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,64,1,128,1,fp8,fp8,0,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,64,2,128,1,float16,float16,0,0.05267040133476257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,64,2,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,64,2,128,1,float16,fp8,0,0.01037919968366623
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,64,2,128,1,fp8,fp8,0,0.010494399815797806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,64,4,128,1,float16,float16,0,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,64,4,128,1,float16,fp8,0,0.010384000092744827
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,64,4,128,1,fp8,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,64,8,128,1,float16,float16,0,0.01043040007352829
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,64,8,128,1,float16,fp8,0,0.010393600165843963
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,64,64,128,1,float16,float16,0,0.010475199669599533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,64,4,128,1,float16,fp8,0,0.04938240051269531
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,64,64,128,1,float16,fp8,0,0.010361599922180175
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,64,1,128,1,float16,float16,0,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,64,1,128,1,float16,fp8,0,0.010075200349092484
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,64,64,128,1,fp8,fp8,0,0.010305599868297577
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,64,1,128,1,fp8,fp8,0,0.010396800190210342
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,64,2,128,1,float16,fp8,0,0.009110400080680847
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,64,4,128,1,float16,float16,0,0.010158400237560272
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,64,2,128,1,fp8,fp8,0,0.010366400331258773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,64,4,128,1,float16,fp8,0,0.009374400228261947
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,64,4,128,1,fp8,fp8,0,0.010371199995279311
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,64,8,128,1,float16,float16,0,0.010446400195360184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,64,8,128,1,float16,fp8,0,0.01045759990811348
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,64,8,128,1,fp8,fp8,0,0.010436800122261048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,64,2,128,1,float16,float16,0,0.01043040007352829
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,64,1,128,1,float16,fp8,0,0.3163808107376099
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,64,1,128,1,fp8,fp8,0,0.3168191909790039
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,64,2,128,1,float16,float16,0,0.33809919357299806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,64,2,128,1,float16,fp8,0,0.31640479564666746
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,64,2,128,1,fp8,fp8,0,0.31710081100463866
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,64,1,128,1,float16,fp8,0,0.012455999851226807
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,64,2,128,1,float16,fp8,0,0.012595200538635254
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,64,4,128,1,float16,float16,0,0.3364880084991455
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,64,4,128,1,float16,fp8,0,0.3167040109634399
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,64,4,128,1,fp8,fp8,0,0.31699039936065676
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,64,8,128,1,float16,float16,0,0.33849918842315674
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,64,8,128,1,float16,fp8,0,0.3170128107070923
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,64,64,128,1,float16,float16,0,0.17721439599990846
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,64,64,128,1,float16,fp8,0,0.16535199880599977
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,64,64,128,1,fp8,fp8,0,0.16484479904174804
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,64,1,128,1,float16,float16,0,0.17577600479125977
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,64,1,128,1,float16,fp8,0,0.1645311951637268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,64,1,128,1,fp8,fp8,0,0.16301599740982056
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,64,2,128,1,float16,float16,0,0.17558079957962036
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,64,2,128,1,float16,fp8,0,0.16439520120620726
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,64,2,128,1,fp8,fp8,0,0.1633792042732239
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,64,4,128,1,float16,float16,0,0.17557599544525146
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,64,8,128,1,fp8,fp8,0,0.01048159971833229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,64,4,128,1,float16,fp8,0,0.16463199853897095
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,64,8,128,1,float16,float16,0,0.1737712025642395
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,64,8,128,1,float16,fp8,0,0.16417280435562134
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,64,64,128,1,float16,float16,0,0.09565920233726502
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,64,64,128,1,float16,fp8,0,0.08734719753265381
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,64,8,128,1,fp8,fp8,0,0.16316800117492675
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,64,2,128,1,float16,float16,0,0.010372799634933472
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,64,64,128,1,fp8,fp8,0,0.08767679929733277
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,64,1,128,1,float16,float16,0,0.09394400119781494
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,64,1,128,1,float16,fp8,0,0.08652319908142089
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,64,1,128,1,fp8,fp8,0,0.0871999979019165
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,64,2,128,1,float16,fp8,0,0.0874239981174469
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,64,2,128,1,float16,float16,0,0.09328320026397705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,64,2,128,1,fp8,fp8,0,0.08708639740943909
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,64,4,128,1,float16,fp8,0,0.08708959817886353
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,64,4,128,1,float16,float16,0,0.09286080002784729
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,64,8,128,1,float16,float16,0,0.0934000015258789
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,64,8,128,1,fp8,fp8,0,0.08708639740943909
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,64,64,128,1,float16,float16,0,0.05334879755973816
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,64,64,128,1,float16,fp8,0,0.047774401307106015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,64,1,128,1,float16,float16,0,0.05145760178565979
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,64,1,128,1,float16,fp8,0,0.047758400440216064
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,64,8,128,1,float16,fp8,0,0.08717920184135437
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,64,1,128,1,fp8,fp8,0,0.04731679856777191
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,64,1,128,1,float16,float16,0,0.3376240015029907
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,64,2,128,1,float16,fp8,0,0.048147198557853696
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,64,4,128,1,float16,float16,0,0.05194240212440491
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,64,2,128,1,float16,float16,0,0.052091199159622195
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,64,4,128,1,float16,fp8,0,0.04806720018386841
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,64,4,128,1,fp8,fp8,0,0.047419199347496034
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,64,8,128,1,float16,float16,0,0.052086400985717776
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,64,8,128,1,float16,fp8,0,0.048046401143074034
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,64,8,128,1,fp8,fp8,0,0.047367998957633974
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,64,64,128,1,float16,float16,0,0.03167999982833862
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,64,64,128,1,float16,fp8,0,0.028908801078796387
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,64,64,128,1,fp8,fp8,0,0.027318400144577027
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,64,1,128,1,float16,float16,0,0.030799999833106995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,64,1,128,1,float16,fp8,0,0.02868640124797821
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,64,1,128,1,fp8,fp8,0,0.02685439884662628
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,64,2,128,1,float16,fp8,0,0.028672000765800475
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,64,2,128,1,fp8,fp8,0,0.027100801467895508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,64,4,128,1,float16,float16,0,0.03070560097694397
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,64,4,128,1,float16,fp8,0,0.02699199914932251
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,64,4,128,1,fp8,fp8,0,0.028484800457954408
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,64,8,128,1,float16,float16,0,0.029707199335098265
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,64,8,128,1,float16,fp8,0,0.027318400144577027
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,64,8,128,1,fp8,fp8,0,0.027622398734092713
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,64,64,128,1,float16,float16,0,0.020694400370121
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,64,64,128,1,float16,fp8,0,0.01863040030002594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,64,64,128,1,fp8,fp8,0,0.018697600066661834
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,64,1,128,1,float16,float16,0,0.019145600497722626
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,64,1,128,1,float16,fp8,0,0.01870879977941513
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,64,4,128,1,fp8,fp8,0,0.16312479972839355
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,64,2,128,1,float16,float16,0,0.018932799994945525
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,64,1,128,1,fp8,fp8,0,0.018619200587272643
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,64,2,128,1,float16,fp8,0,0.018649600446224213
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,64,4,128,1,float16,float16,0,0.018665599822998046
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,64,4,128,1,float16,fp8,0,0.018615999817848207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,64,4,128,1,fp8,fp8,0,0.018675200641155243
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,64,4,128,1,fp8,fp8,0,0.08702880144119263
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,64,8,128,1,float16,float16,0,0.018671999871730804
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,64,8,128,1,float16,fp8,0,0.018598400056362152
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,64,8,128,1,fp8,fp8,0,0.018667200207710268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,64,64,128,1,float16,float16,0,0.014601600170135499
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,64,64,128,1,float16,fp8,0,0.012532800436019897
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,64,64,128,1,fp8,fp8,0,0.04773600101470947
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,64,64,128,1,fp8,fp8,0,0.012505599856376648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,64,1,128,1,float16,float16,0,0.012529599666595458
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,64,1,128,1,float16,fp8,0,0.012515200674533844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,64,1,128,1,fp8,fp8,0,0.012476799637079239
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,64,2,128,1,float16,float16,0,0.01417279988527298
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,64,2,128,1,float16,fp8,0,0.012484800070524216
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,64,2,128,1,fp8,fp8,0,0.01249919980764389
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,64,2,128,1,fp8,fp8,0,0.04765599966049194
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,64,4,128,1,float16,fp8,0,0.012595200538635254
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,64,4,128,1,fp8,fp8,0,0.012515200674533844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,64,8,128,1,float16,float16,0,0.014440000057220459
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,64,4,128,1,float16,float16,0,0.01327040046453476
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,64,8,128,1,float16,fp8,0,0.012470400333404541
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,64,64,128,1,float16,float16,0,0.01252480000257492
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,64,64,128,1,float16,fp8,0,0.010471999645233154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,64,64,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,64,1,128,1,float16,float16,0,0.010531199723482132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,64,1,128,1,float16,fp8,0,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,64,1,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,64,2,128,1,float16,float16,0,0.010491199791431427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,64,2,128,1,float16,fp8,0,0.010497599840164185
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,64,2,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,64,4,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,64,4,128,1,float16,fp8,0,0.010753600299358368
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,64,4,128,1,fp8,fp8,0,0.010401599854230881
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,64,8,128,1,float16,float16,0,0.010505600273609162
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,64,8,128,1,float16,fp8,0,0.010396800190210342
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,64,8,128,1,fp8,fp8,0,0.010416000336408614
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,64,64,128,1,float16,fp8,0,0.010393600165843963
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,64,64,128,1,fp8,fp8,0,0.01043199971318245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,64,1,128,1,float16,float16,0,0.010444799810647965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,64,1,128,1,float16,fp8,0,0.009910400211811065
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,64,1,128,1,fp8,fp8,0,0.01037919968366623
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,64,2,128,1,fp8,fp8,0,0.010398399829864503
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,64,4,128,1,float16,float16,0,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,64,4,128,1,float16,fp8,0,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,64,4,128,1,fp8,fp8,0,0.010452800244092942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,64,8,128,1,fp8,fp8,0,0.31672799587249756
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,64,8,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,64,8,128,1,float16,fp8,0,0.008723200112581254
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,64,8,128,1,fp8,fp8,0,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,64,2,128,1,fp8,fp8,0,0.01863519996404648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,64,8,128,1,fp8,fp8,0,0.012534399330615998
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,64,2,128,1,float16,float16,0,0.030731201171875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,64,64,128,1,float16,float16,0,0.010580799728631973
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,64,2,128,1,float16,float16,0,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,64,2,128,1,float16,fp8,0,0.010382399708032609
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,1,128,1,float16,fp8,0,16.823605346679688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,1,128,1,fp8,fp8,0,17.0395263671875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,2,128,1,float16,fp8,0,16.838742065429688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,2,128,1,fp8,fp8,0,16.838848876953126
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,4,128,1,float16,fp8,0,16.89317169189453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,1,128,1,float16,float16,0,21.547244262695312
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,2,128,1,float16,float16,0,21.559005737304688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,4,128,1,float16,float16,0,21.87733612060547
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,48,128,1,float16,fp8,0,8.728334045410156
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,4,128,1,fp8,fp8,0,17.226608276367188
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,48,128,1,fp8,fp8,0,8.780897521972657
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,1,128,1,float16,float16,0,11.000748443603516
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,8,128,1,float16,fp8,0,17.319520568847658
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,8,128,1,fp8,fp8,0,17.3089599609375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,8,128,1,float16,float16,0,22.28675079345703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,1,128,1,float16,fp8,0,8.503233337402344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,1,128,1,fp8,fp8,0,8.614851379394532
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,2,128,1,float16,fp8,0,8.613543701171874
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,2,128,1,fp8,fp8,0,8.634102630615235
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,2,128,1,float16,float16,0,10.965586853027343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,4,128,1,float16,float16,0,11.004232025146484
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,4,128,1,float16,fp8,0,8.656953430175781
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,4,128,1,fp8,fp8,0,8.654267120361329
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,48,128,1,float16,fp8,0,4.604020690917968
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,8,128,1,float16,fp8,0,8.645028686523437
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,8,128,1,float16,float16,0,11.176412963867188
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,48,128,1,fp8,fp8,0,4.54597282409668
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,8,128,1,fp8,fp8,0,8.629625701904297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,1,128,1,float16,float16,0,5.464788818359375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,1,128,1,float16,fp8,0,4.287684631347656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,1,128,1,fp8,fp8,0,4.345880126953125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,2,128,1,float16,fp8,0,4.328219223022461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,2,128,1,fp8,fp8,0,4.309936141967773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,2,128,1,float16,float16,0,5.479763031005859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,4,128,1,float16,float16,0,5.535728073120117
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,4,128,1,float16,fp8,0,4.278020858764648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,4,128,1,fp8,fp8,0,4.340288162231445
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,8,128,1,float16,float16,0,5.557532882690429
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,8,128,1,float16,fp8,0,4.450980758666992
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,48,128,1,float16,fp8,0,2.289846420288086
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,8,128,1,fp8,fp8,0,4.432283020019531
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,48,128,1,fp8,fp8,0,2.2439279556274414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,1,128,1,float16,fp8,0,2.1831119537353514
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,1,128,1,fp8,fp8,0,2.1718608856201174
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,1,128,1,float16,float16,0,2.7611711502075194
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,2,128,1,float16,fp8,0,2.1557056427001955
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,2,128,1,float16,float16,0,2.680281639099121
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,2,128,1,fp8,fp8,0,2.1580928802490233
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,4,128,1,float16,float16,0,2.6592784881591798
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,4,128,1,float16,fp8,0,2.4154287338256837
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,4,128,1,fp8,fp8,0,2.166417694091797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,8,128,1,float16,fp8,0,2.1727903366088865
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,8,128,1,float16,float16,0,2.7587360382080077
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,8,128,1,fp8,fp8,0,2.4728767395019533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,1,128,1,float16,fp8,0,9.87078857421875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,1,128,1,fp8,fp8,0,9.881240081787109
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,1,128,1,float16,float16,0,12.563086700439452
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,2,128,1,float16,fp8,0,9.847160339355469
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,2,128,1,float16,float16,0,12.685424041748046
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,2,128,1,fp8,fp8,0,10.152537536621093
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,4,128,1,float16,fp8,0,9.886100769042969
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,4,128,1,float16,float16,0,12.7053955078125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,4,128,1,fp8,fp8,0,10.033560180664063
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,8,128,1,float16,float16,0,12.871844482421874
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,8,128,1,float16,fp8,0,10.127390289306641
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,48,128,1,float16,fp8,0,5.161819076538086
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,8,128,1,fp8,fp8,0,9.991613006591797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,48,128,1,fp8,fp8,0,5.28430404663086
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,1,128,1,float16,fp8,0,4.849382400512695
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,1,128,1,fp8,fp8,0,4.947966384887695
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,1,128,1,float16,float16,0,6.388276672363281
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,2,128,1,float16,float16,0,6.350227355957031
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,2,128,1,float16,fp8,0,4.925299072265625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,2,128,1,fp8,fp8,0,4.977691268920898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,4,128,1,float16,float16,0,6.41095199584961
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,4,128,1,float16,fp8,0,5.036476898193359
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,4,128,1,fp8,fp8,0,5.0415809631347654
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,8,128,1,float16,float16,0,6.4372802734375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,8,128,1,fp8,fp8,0,4.988809585571289
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,48,128,1,float16,fp8,0,2.810740852355957
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,48,128,1,fp8,fp8,0,2.6424560546875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,8,128,1,float16,fp8,0,5.122347259521485
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,1,128,1,float16,fp8,0,2.4971872329711915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,1,128,1,float16,float16,0,3.015924835205078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,1,128,1,fp8,fp8,0,2.472447967529297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,2,128,1,float16,float16,0,3.1001823425292967
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,2,128,1,float16,fp8,0,2.8882911682128904
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,2,128,1,fp8,fp8,0,2.5155055999755858
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,4,128,1,float16,fp8,0,2.498766326904297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,4,128,1,fp8,fp8,0,2.5089088439941407
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,4,128,1,float16,float16,0,3.2034751892089846
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,8,128,1,float16,fp8,0,2.48023681640625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,8,128,1,float16,float16,0,3.0984256744384764
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,8,128,1,fp8,fp8,0,2.882302474975586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,48,128,1,float16,fp8,0,1.5782575607299805
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,48,128,1,fp8,fp8,0,1.391102409362793
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,1,128,1,float16,float16,0,1.480452823638916
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,1,128,1,float16,fp8,0,1.276318359375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,1,128,1,fp8,fp8,0,1.4112799644470215
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,2,128,1,float16,float16,0,1.4867775917053223
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,2,128,1,float16,fp8,0,1.3095456123352052
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,2,128,1,fp8,fp8,0,1.494923210144043
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,4,128,1,float16,float16,0,1.4991583824157715
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,4,128,1,float16,fp8,0,1.3286144256591796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,4,128,1,fp8,fp8,0,1.4107631683349608
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,8,128,1,float16,float16,0,1.5066351890563965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,8,128,1,float16,fp8,0,1.3100655555725098
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,8,128,1,fp8,fp8,0,1.4513808250427247
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,1,128,1,float16,fp8,0,6.954428863525391
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,1,128,1,float16,float16,0,8.839115142822266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,1,128,1,fp8,fp8,0,7.023831939697265
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,2,128,1,float16,float16,0,8.965916442871094
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,2,128,1,float16,fp8,0,7.084442901611328
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,2,128,1,fp8,fp8,0,7.021186828613281
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,4,128,1,float16,float16,0,9.12930908203125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,4,128,1,float16,fp8,0,6.986601257324219
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,4,128,1,fp8,fp8,0,7.024562835693359
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,8,128,1,float16,float16,0,9.20782241821289
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,8,128,1,float16,fp8,0,6.980265808105469
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,8,128,1,fp8,fp8,0,7.0995933532714846
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,48,128,1,float16,fp8,0,3.73803825378418
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,48,128,1,float16,float16,0,3.126775932312012
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,48,128,1,float16,float16,0,2.700831985473633
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,48,128,1,fp8,fp8,0,3.7322879791259767
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,48,128,1,float16,float16,0,4.549631881713867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,48,128,1,float16,float16,0,6.462980651855469
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,48,128,1,float16,float16,0,5.439750289916992
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,48,128,1,float16,float16,0,1.5668671607971192
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,1,128,1,float16,float16,0,4.262369537353516
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,1,128,1,float16,fp8,0,3.5999374389648438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,1,128,1,fp8,fp8,0,3.535055923461914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,48,128,1,float16,float16,0,11.174441528320312
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,2,128,1,float16,fp8,0,3.5970577239990233
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,2,128,1,float16,float16,0,4.362400054931641
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,2,128,1,fp8,fp8,0,3.613324737548828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,4,128,1,float16,fp8,0,3.6992752075195314
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,4,128,1,float16,float16,0,4.392889785766601
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,4,128,1,fp8,fp8,0,3.4216545104980467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,48,128,1,float16,float16,0,2.202462387084961
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,8,128,1,float16,float16,0,4.58807373046875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,48,128,1,float16,fp8,0,1.8646656036376954
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,8,128,1,float16,fp8,0,3.45731201171875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,1,128,1,float16,float16,0,2.038929557800293
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,48,128,1,fp8,fp8,0,2.095756721496582
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,8,128,1,fp8,fp8,0,3.690670394897461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,1,128,1,float16,fp8,0,1.9936288833618163
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,1,128,1,fp8,fp8,0,2.0697759628295898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,2,128,1,float16,fp8,0,1.7882095336914063
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,2,128,1,float16,float16,0,2.0885839462280273
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,2,128,1,fp8,fp8,0,2.006942367553711
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,4,128,1,float16,float16,0,2.108139228820801
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,4,128,1,float16,fp8,0,1.8024560928344726
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,4,128,1,fp8,fp8,0,1.780561637878418
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,48,128,1,float16,float16,0,1.2427408218383789
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,48,128,1,float16,fp8,0,0.9968095779418945
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,8,128,1,float16,fp8,0,1.7537647247314454
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,8,128,1,fp8,fp8,0,1.758500862121582
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,48,128,1,fp8,fp8,0,1.1465920448303222
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,8,128,1,float16,float16,0,2.117888069152832
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,1,128,1,float16,float16,0,1.040884780883789
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,1,128,1,float16,fp8,0,0.9292367935180664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,2,128,1,float16,float16,0,1.0339520454406739
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,1,128,1,fp8,fp8,0,1.0740464210510254
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,2,128,1,fp8,fp8,0,0.9312607765197753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,4,128,1,fp8,fp8,0,0.9196208000183106
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,4,128,1,float16,float16,0,1.0579360008239747
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,4,128,1,float16,fp8,0,1.096236801147461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,8,128,1,float16,float16,0,1.046126365661621
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,8,128,1,float16,fp8,0,0.9920687675476074
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,8,128,1,fp8,fp8,0,0.9796879768371582
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,2,128,1,float16,fp8,0,0.9182160377502442
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,1,128,1,float16,fp8,0,9.204637145996093
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,1,128,1,fp8,fp8,0,9.209763336181641
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,2,128,1,float16,fp8,0,9.214070129394532
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,2,128,1,fp8,fp8,0,9.165821075439453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,1,128,1,float16,float16,0,11.653273773193359
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,2,128,1,float16,float16,0,11.664988708496093
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,4,128,1,float16,float16,0,11.790412902832031
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,4,128,1,float16,fp8,0,8.910842895507812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,48,128,1,float16,fp8,0,5.056679916381836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,48,128,1,float16,float16,0,6.3522590637207035
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,48,128,1,fp8,fp8,0,4.992284774780273
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,1,128,1,float16,float16,0,5.668964767456055
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,4,128,1,fp8,fp8,0,9.288014221191407
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,8,128,1,float16,fp8,0,9.31719970703125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,8,128,1,fp8,fp8,0,9.216847991943359
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,8,128,1,float16,float16,0,11.901934051513672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,1,128,1,float16,fp8,0,4.588507080078125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,1,128,1,fp8,fp8,0,4.6895599365234375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,2,128,1,float16,fp8,0,4.740566253662109
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,2,128,1,fp8,fp8,0,4.610678482055664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,2,128,1,float16,float16,0,5.811227035522461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,4,128,1,float16,fp8,0,4.595318222045899
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,4,128,1,float16,float16,0,5.831103897094726
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,4,128,1,fp8,fp8,0,4.804905700683594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,48,128,1,float16,fp8,0,2.655790328979492
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,48,128,1,float16,float16,0,3.0364112854003906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,48,128,1,fp8,fp8,0,2.503228759765625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,8,128,1,float16,fp8,0,4.5561775207519535
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,8,128,1,float16,float16,0,5.911582565307617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,1,128,1,float16,float16,0,2.861240005493164
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,8,128,1,fp8,fp8,0,4.611220932006836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,1,128,1,float16,fp8,0,2.3131711959838865
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,1,128,1,fp8,fp8,0,2.282512092590332
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,2,128,1,float16,fp8,0,2.296281623840332
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,2,128,1,float16,float16,0,2.917844772338867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,2,128,1,fp8,fp8,0,2.335985565185547
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,4,128,1,fp8,fp8,0,2.3185152053833007
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,4,128,1,float16,fp8,0,2.619011116027832
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,4,128,1,float16,float16,0,2.971673583984375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,8,128,1,float16,fp8,0,2.305886459350586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,48,128,1,float16,float16,0,1.6221359252929688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,48,128,1,float16,fp8,0,1.2766799926757812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,8,128,1,fp8,fp8,0,2.3069263458251954
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,8,128,1,float16,float16,0,2.8147247314453123
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,48,128,1,fp8,fp8,0,1.269495964050293
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,1,128,1,float16,float16,0,1.3201567649841308
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,1,128,1,float16,fp8,0,1.4004112243652345
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,1,128,1,fp8,fp8,0,1.1848992347717284
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,2,128,1,float16,fp8,0,1.177246379852295
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,2,128,1,fp8,fp8,0,1.2254480361938476
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,2,128,1,float16,float16,0,1.502742385864258
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,4,128,1,float16,fp8,0,1.1741375923156738
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,4,128,1,fp8,fp8,0,1.3136272430419922
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,48,128,1,float16,float16,0,0.7578495979309082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,8,128,1,float16,float16,0,1.3528528213500977
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,8,128,1,float16,fp8,0,1.1749648094177245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,48,128,1,fp8,fp8,0,0.6669072151184082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,8,128,1,fp8,fp8,0,1.1719712257385253
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,1,128,1,float16,float16,0,0.6939663887023926
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,1,128,1,float16,fp8,0,0.7080224037170411
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,1,128,1,fp8,fp8,0,0.7252111911773682
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,2,128,1,float16,fp8,0,0.6207168102264404
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,2,128,1,float16,float16,0,0.6876495838165283
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,4,128,1,float16,float16,0,0.6999423980712891
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,4,128,1,float16,fp8,0,0.6826416015625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,4,128,1,fp8,fp8,0,0.6212031841278076
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,8,128,1,float16,float16,0,0.6987775802612305
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,8,128,1,float16,fp8,0,0.6698416233062744
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,8,128,1,fp8,fp8,0,0.637179183959961
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,4,128,1,float16,float16,0,1.4888511657714845
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,48,128,1,float16,fp8,0,0.8062671661376953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,2,128,1,fp8,fp8,0,0.6210527896881104
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,1,128,1,float16,fp8,0,5.3452495574951175
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,1,128,1,fp8,fp8,0,5.31128158569336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,1,128,1,float16,float16,0,6.745120239257813
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,2,128,1,float16,fp8,0,5.314225769042968
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,2,128,1,float16,float16,0,6.690956878662109
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,2,128,1,fp8,fp8,0,5.294289779663086
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,4,128,1,float16,fp8,0,5.294440078735351
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,4,128,1,float16,float16,0,6.530534362792968
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,48,128,1,float16,fp8,0,2.9364784240722654
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,48,128,1,float16,float16,0,3.6102928161621093
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,4,128,1,fp8,fp8,0,5.398759841918945
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,8,128,1,float16,fp8,0,5.458099365234375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,48,128,1,fp8,fp8,0,2.9808160781860353
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,8,128,1,float16,float16,0,6.86419677734375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,1,128,1,float16,float16,0,3.2807327270507813
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,1,128,1,float16,fp8,0,2.643044853210449
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,1,128,1,fp8,fp8,0,2.927432060241699
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,2,128,1,float16,float16,0,3.293062210083008
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,2,128,1,float16,fp8,0,2.7037631988525392
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,2,128,1,fp8,fp8,0,2.650619125366211
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,4,128,1,float16,fp8,0,2.829151916503906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,4,128,1,float16,float16,0,3.3686878204345705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,8,128,1,fp8,fp8,0,5.336468887329102
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,48,128,1,float16,float16,0,1.7535808563232422
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,8,128,1,float16,float16,0,3.3237743377685547
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,48,128,1,float16,fp8,0,1.494495964050293
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,8,128,1,float16,fp8,0,3.056286430358887
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,1,128,1,float16,float16,0,1.5376928329467774
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,48,128,1,fp8,fp8,0,1.7448223114013672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,8,128,1,fp8,fp8,0,2.698958396911621
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,1,128,1,float16,fp8,0,1.562065601348877
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,2,128,1,float16,float16,0,1.5694432258605957
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,1,128,1,fp8,fp8,0,1.5439840316772462
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,2,128,1,float16,fp8,0,1.3771807670593261
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,2,128,1,fp8,fp8,0,1.5486543655395508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,4,128,1,float16,fp8,0,1.373363208770752
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,4,128,1,float16,float16,0,1.5294367790222168
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,4,128,1,fp8,fp8,0,2.665126419067383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,48,128,1,float16,fp8,0,0.7772096157073974
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,48,128,1,float16,float16,0,0.8937952041625976
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,48,128,1,fp8,fp8,0,0.8775983810424804
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,4,128,1,fp8,fp8,0,1.4107487678527832
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,8,128,1,float16,fp8,0,1.385862445831299
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,8,128,1,fp8,fp8,0,1.3571711540222169
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,1,128,1,float16,float16,0,0.7820032119750977
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,8,128,1,float16,float16,0,1.7528367996215821
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,1,128,1,float16,fp8,0,0.7089983940124511
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,1,128,1,fp8,fp8,0,0.7629792213439941
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,2,128,1,float16,fp8,0,0.7221712112426758
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,2,128,1,float16,float16,0,0.8081168174743653
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,2,128,1,fp8,fp8,0,0.7077263832092285
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,4,128,1,float16,fp8,0,0.704534387588501
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,4,128,1,float16,float16,0,0.7869760036468506
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,4,128,1,fp8,fp8,0,0.8661135673522949
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,48,128,1,float16,fp8,0,0.4170383930206299
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,48,128,1,float16,float16,0,0.5110911846160888
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,48,128,1,fp8,fp8,0,0.4145343780517578
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,8,128,1,float16,float16,0,0.7864496231079101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,8,128,1,float16,fp8,0,0.7286272048950195
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,8,128,1,fp8,fp8,0,0.7076352119445801
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,1,128,1,fp8,fp8,0,0.3803168058395386
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,1,128,1,float16,fp8,0,0.4640751838684082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,2,128,1,float16,fp8,0,0.37950398921966555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,2,128,1,fp8,fp8,0,0.37999041080474855
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,4,128,1,float16,float16,0,0.4228464126586914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,4,128,1,fp8,fp8,0,0.37956159114837645
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,8,128,1,float16,float16,0,0.42560639381408694
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,8,128,1,float16,fp8,0,0.3799999952316284
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,8,128,1,fp8,fp8,0,0.381825590133667
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,1,128,1,float16,float16,0,0.45977439880371096
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,2,128,1,float16,float16,0,0.4456960201263428
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,4,128,1,float16,fp8,0,0.38335039615631106
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,1,128,1,float16,fp8,0,5.051567840576172
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,1,128,1,fp8,fp8,0,5.056884765625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,2,128,1,float16,fp8,0,5.135732650756836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,1,128,1,float16,float16,0,6.356208038330078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,2,128,1,float16,float16,0,6.251436614990235
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,2,128,1,fp8,fp8,0,5.085699081420898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,4,128,1,float16,fp8,0,5.070040130615235
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,4,128,1,float16,float16,0,6.215488052368164
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,48,128,1,float16,float16,0,3.599095916748047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,4,128,1,fp8,fp8,0,5.1128192901611325
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,8,128,1,float16,fp8,0,5.195684814453125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,48,128,1,float16,fp8,0,2.913737678527832
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,8,128,1,fp8,fp8,0,5.280964660644531
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,8,128,1,float16,float16,0,6.40808334350586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,48,128,1,fp8,fp8,0,2.935009574890137
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,1,128,1,float16,float16,0,3.05600643157959
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,1,128,1,fp8,fp8,0,2.5871999740600584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,1,128,1,float16,fp8,0,2.74682559967041
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,2,128,1,float16,float16,0,3.0656255722045898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,2,128,1,float16,fp8,0,2.5474784851074217
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,2,128,1,fp8,fp8,0,2.6112319946289064
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,4,128,1,float16,fp8,0,2.5702816009521485
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,4,128,1,float16,float16,0,3.1513919830322266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,4,128,1,fp8,fp8,0,2.7267871856689454
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,48,128,1,float16,float16,0,1.7337839126586914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,48,128,1,float16,fp8,0,1.4768671989440918
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,8,128,1,float16,float16,0,3.1496912002563477
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,8,128,1,fp8,fp8,0,2.5601072311401367
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,48,128,1,fp8,fp8,0,1.6997888565063477
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,8,128,1,float16,fp8,0,2.9091615676879883
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,1,128,1,float16,float16,0,1.425915241241455
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,1,128,1,fp8,fp8,0,1.296622371673584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,1,128,1,float16,fp8,0,1.5242671966552734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,2,128,1,float16,fp8,0,1.3039600372314453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,2,128,1,float16,float16,0,1.719428825378418
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,2,128,1,fp8,fp8,0,1.3307200431823731
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,4,128,1,float16,float16,0,1.4322799682617187
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,4,128,1,float16,fp8,0,1.3587871551513673
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,4,128,1,fp8,fp8,0,1.2941967964172363
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,48,128,1,float16,float16,0,0.8536432266235352
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,48,128,1,fp8,fp8,0,0.8006640434265136
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,8,128,1,float16,float16,0,1.5008768081665038
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,8,128,1,float16,fp8,0,1.29551362991333
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,8,128,1,fp8,fp8,0,1.5308176040649415
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,1,128,1,float16,float16,0,0.7347152233123779
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,1,128,1,float16,fp8,0,0.7192704200744628
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,1,128,1,fp8,fp8,0,0.67816162109375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,2,128,1,float16,float16,0,0.8381232261657715
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,2,128,1,fp8,fp8,0,0.6657392024993897
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,4,128,1,float16,float16,0,0.7406191825866699
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,4,128,1,float16,fp8,0,0.7633696079254151
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,4,128,1,fp8,fp8,0,0.6855152130126954
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,8,128,1,float16,float16,0,0.7454495906829834
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,48,128,1,float16,fp8,0,0.43053760528564455
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,8,128,1,float16,fp8,0,0.8135439872741699
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,48,128,1,fp8,fp8,0,0.4056079864501953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,1,128,1,float16,float16,0,0.3785792112350464
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,1,128,1,float16,fp8,0,0.3797152042388916
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,1,128,1,fp8,fp8,0,0.4212240219116211
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,2,128,1,float16,float16,0,0.37707040309906004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,2,128,1,float16,fp8,0,0.36922080516815187
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,2,128,1,fp8,fp8,0,0.3661056041717529
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,4,128,1,float16,fp8,0,0.36066720485687254
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,4,128,1,float16,float16,0,0.4120272159576416
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,4,128,1,fp8,fp8,0,0.3601167917251587
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,8,128,1,float16,float16,0,0.39002718925476076
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,48,128,1,float16,float16,0,0.24209120273590087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,8,128,1,float16,fp8,0,0.35582718849182127
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,8,128,1,fp8,fp8,0,0.3765536069869995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,48,128,1,float16,fp8,0,0.22426879405975342
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,48,128,1,fp8,fp8,0,0.2263200044631958
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,1,128,1,float16,fp8,0,0.21085760593414307
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,1,128,1,fp8,fp8,0,0.19795039892196656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,2,128,1,float16,float16,0,0.21046559810638427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,2,128,1,float16,fp8,0,0.20522398948669435
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,2,128,1,fp8,fp8,0,0.19763519763946533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,4,128,1,float16,float16,0,0.21284000873565673
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,4,128,1,float16,fp8,0,0.20398399829864503
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,4,128,1,fp8,fp8,0,0.19853919744491577
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,8,128,1,float16,float16,0,0.21464319229125978
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,8,128,1,float16,fp8,0,0.20455520153045653
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,8,128,1,fp8,fp8,0,0.19823839664459228
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,2,128,1,float16,fp8,0,0.6680912017822266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,48,128,1,float16,float16,0,0.44958720207214353
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,8,128,1,fp8,fp8,0,0.6668464183807373
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,48,128,1,float16,fp8,0,0.8681360244750976
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,1,128,1,float16,fp8,0,3.0855087280273437
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,1,128,1,fp8,fp8,0,3.074072074890137
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,1,128,1,float16,float16,0,3.6232894897460937
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,2,128,1,float16,float16,0,3.592350387573242
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,2,128,1,float16,fp8,0,3.0675535202026367
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,2,128,1,fp8,fp8,0,3.0660720825195313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,4,128,1,float16,float16,0,3.4773406982421875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,4,128,1,float16,fp8,0,3.0751775741577148
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,1,128,1,float16,float16,0,0.20692799091339112
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,8,128,1,fp8,fp8,0,3.0882240295410157
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,8,128,1,float16,fp8,0,3.2473377227783202
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,8,128,1,float16,float16,0,3.8091617584228517
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,1,128,1,float16,float16,0,1.6945968627929688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,48,128,1,fp8,fp8,0,1.8294607162475587
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,48,128,1,float16,fp8,0,1.9816576004028321
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,1,128,1,float16,fp8,0,1.572488021850586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,1,128,1,fp8,fp8,0,1.5601584434509277
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,2,128,1,float16,float16,0,1.9899791717529296
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,2,128,1,fp8,fp8,0,1.556772804260254
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,2,128,1,float16,fp8,0,1.7646400451660156
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,4,128,1,float16,float16,0,1.6976816177368164
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,4,128,1,float16,fp8,0,1.5537440299987793
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,4,128,1,fp8,fp8,0,1.5560784339904785
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,8,128,1,float16,float16,0,1.8674560546875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,48,128,1,float16,fp8,0,0.9242336273193359
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,48,128,1,fp8,fp8,0,0.9255375862121582
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,8,128,1,float16,fp8,0,1.5577360153198243
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,8,128,1,fp8,fp8,0,1.5738544464111328
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,1,128,1,float16,fp8,0,0.7915264129638672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,1,128,1,float16,float16,0,0.9636943817138672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,1,128,1,fp8,fp8,0,0.7933631896972656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,2,128,1,float16,float16,0,0.8698335647583008
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,2,128,1,float16,fp8,0,0.7967408180236817
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,2,128,1,fp8,fp8,0,0.7909071922302247
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,4,128,1,float16,float16,0,0.937990379333496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,4,128,1,float16,fp8,0,0.8247872352600097
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,48,128,1,float16,float16,0,2.132252883911133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,8,128,1,float16,float16,0,0.8844479560852051
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,48,128,1,float16,float16,0,0.5323023796081543
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,8,128,1,fp8,fp8,0,0.8115920066833496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,8,128,1,float16,fp8,0,0.8885616302490235
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,48,128,1,float16,fp8,0,0.517139196395874
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,48,128,1,fp8,fp8,0,0.47919201850891113
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,1,128,1,float16,float16,0,0.4442336082458496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,1,128,1,float16,fp8,0,0.4472303867340088
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,48,128,1,float16,float16,0,1.1346128463745118
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,2,128,1,float16,float16,0,0.4357471942901611
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,2,128,1,float16,fp8,0,0.424132776260376
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,2,128,1,fp8,fp8,0,0.4269392013549805
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,4,128,1,float16,float16,0,0.4388927936553955
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,4,128,1,float16,fp8,0,0.41262078285217285
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,4,128,1,fp8,fp8,0,0.42458720207214357
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,8,128,1,float16,float16,0,0.4528384208679199
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,8,128,1,float16,fp8,0,0.41433281898498536
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,48,128,1,float16,float16,0,0.2822767972946167
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,48,128,1,float16,fp8,0,0.2628864049911499
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,8,128,1,fp8,fp8,0,0.41218881607055663
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,48,128,1,fp8,fp8,0,0.25863521099090575
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,1,128,1,float16,float16,0,0.23506240844726561
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,1,128,1,float16,fp8,0,0.22692959308624266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,1,128,1,fp8,fp8,0,0.2228463888168335
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,2,128,1,float16,fp8,0,0.22509438991546632
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,2,128,1,fp8,fp8,0,0.227345609664917
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,4,128,1,float16,float16,0,0.23462400436401368
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,4,128,1,fp8,fp8,0,0.7940991878509521
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,4,128,1,fp8,fp8,0,3.0780399322509764
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,4,128,1,float16,fp8,0,0.2250351905822754
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,8,128,1,float16,fp8,0,0.22390880584716796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,48,128,1,float16,float16,0,0.15604480504989623
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,8,128,1,fp8,fp8,0,0.22410879135131836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,48,128,1,float16,fp8,0,0.1469375967979431
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,48,128,1,fp8,fp8,0,0.14671839475631715
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,1,128,1,float16,float16,0,0.13392800092697144
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,1,128,1,float16,fp8,0,0.12860640287399291
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,1,128,1,fp8,fp8,0,0.12801920175552367
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,2,128,1,float16,float16,0,0.13406720161437988
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,2,128,1,float16,fp8,0,0.1283471941947937
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,2,128,1,fp8,fp8,0,0.12874399423599242
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,4,128,1,float16,fp8,0,0.12893120050430298
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,4,128,1,fp8,fp8,0,0.12800639867782593
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,8,128,1,float16,fp8,0,0.12905600070953369
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,8,128,1,fp8,fp8,0,0.1282528042793274
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,1,128,1,fp8,fp8,0,0.4284671783447266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,2,128,1,float16,float16,0,0.23286399841308594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,4,128,1,fp8,fp8,0,0.22973599433898925
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,8,128,1,float16,float16,0,0.23843040466308593
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,4,128,1,float16,float16,0,0.13525760173797607
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,1,128,1,float16,fp8,0,3.1112688064575194
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,1,128,1,float16,float16,0,3.4591102600097656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,8,128,1,float16,float16,0,0.13765920400619508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,1,128,1,fp8,fp8,0,3.1089599609375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,2,128,1,float16,float16,0,3.5279472351074217
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,2,128,1,float16,fp8,0,3.1040143966674805
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,2,128,1,fp8,fp8,0,3.1062847137451173
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,4,128,1,float16,fp8,0,3.1122415542602537
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,4,128,1,float16,float16,0,3.515750503540039
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,4,128,1,fp8,fp8,0,3.237705612182617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,8,128,1,float16,float16,0,3.725940704345703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,48,128,1,float16,fp8,0,1.9157520294189454
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,48,128,1,float16,float16,0,2.1833295822143555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,1,128,1,float16,fp8,0,1.5704431533813477
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,48,128,1,fp8,fp8,0,1.9215599060058595
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,1,128,1,float16,float16,0,1.825663948059082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,8,128,1,fp8,fp8,0,3.26940803527832
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,1,128,1,fp8,fp8,0,1.8145151138305664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,2,128,1,float16,float16,0,1.6922416687011719
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,2,128,1,float16,fp8,0,1.6707311630249024
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,2,128,1,fp8,fp8,0,1.5698464393615723
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,4,128,1,float16,float16,0,1.6813583374023438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,8,128,1,float16,fp8,0,3.107758331298828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,4,128,1,fp8,fp8,0,1.5695695877075195
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,4,128,1,float16,fp8,0,1.7036447525024414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,8,128,1,float16,float16,0,1.7921344757080078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,8,128,1,float16,fp8,0,1.7046144485473633
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,8,128,1,fp8,fp8,0,1.5921711921691895
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,1,128,1,float16,float16,0,0.8265968322753906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,48,128,1,float16,float16,0,1.0937040328979493
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,1,128,1,float16,fp8,0,0.8129520416259766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,48,128,1,fp8,fp8,0,0.971673583984375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,48,128,1,float16,fp8,0,1.046388816833496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,1,128,1,fp8,fp8,0,0.9266511917114257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,2,128,1,float16,float16,0,0.8388640403747558
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,2,128,1,float16,fp8,0,0.8004912376403809
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,2,128,1,fp8,fp8,0,0.7968768119812012
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,4,128,1,float16,fp8,0,0.8004672050476074
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,4,128,1,float16,float16,0,0.8789792060852051
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,4,128,1,fp8,fp8,0,0.8191679954528809
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,8,128,1,float16,float16,0,0.8717472076416015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,48,128,1,float16,float16,0,0.5530208110809326
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,1,128,1,float16,float16,0,0.4380047798156738
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,8,128,1,float16,fp8,0,0.8555423736572265
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,1,128,1,float16,fp8,0,0.4168208122253418
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,8,128,1,fp8,fp8,0,0.8116239547729492
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,1,128,1,fp8,fp8,0,0.4602975845336914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,2,128,1,float16,float16,0,0.4269567966461182
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,2,128,1,float16,fp8,0,0.4336048126220703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,2,128,1,fp8,fp8,0,0.41301760673522947
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,4,128,1,float16,float16,0,0.446884822845459
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,4,128,1,float16,fp8,0,0.4594880104064941
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,4,128,1,fp8,fp8,0,0.4108895778656006
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,8,128,1,float16,float16,0,0.4442448139190674
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,8,128,1,float16,fp8,0,0.413647985458374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,48,128,1,float16,float16,0,0.30392959117889407
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,8,128,1,fp8,fp8,0,0.4284656047821045
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,48,128,1,float16,fp8,0,0.26321280002593994
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,1,128,1,float16,float16,0,0.22539839744567872
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,48,128,1,fp8,fp8,0,0.2752432107925415
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,1,128,1,fp8,fp8,0,0.22809278964996338
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,2,128,1,float16,float16,0,0.23290879726409913
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,2,128,1,float16,fp8,0,0.2207103967666626
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,2,128,1,fp8,fp8,0,0.22838239669799804
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,4,128,1,float16,float16,0,0.22998878955841065
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,4,128,1,float16,fp8,0,0.23043520450592042
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,4,128,1,fp8,fp8,0,0.22019679546356202
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,8,128,1,float16,float16,0,0.24026401042938234
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,8,128,1,float16,fp8,0,0.22444000244140624
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,8,128,1,fp8,fp8,0,0.23044319152832032
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,48,128,1,float16,float16,0,0.15665119886398315
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,48,128,1,float16,fp8,0,0.14939520359039307
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,1,128,1,float16,float16,0,0.12850240468978882
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,1,128,1,float16,fp8,0,0.12220640182495117
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,1,128,1,fp8,fp8,0,0.12477760314941407
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,2,128,1,float16,float16,0,0.12902400493621827
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,2,128,1,float16,fp8,0,0.12187999486923218
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,2,128,1,fp8,fp8,0,0.12255680561065674
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,4,128,1,float16,float16,0,0.13095999956130983
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,4,128,1,float16,fp8,0,0.12263840436935425
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,4,128,1,fp8,fp8,0,0.12277760505676269
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,8,128,1,float16,float16,0,0.13346879482269286
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,8,128,1,float16,fp8,0,0.12300000190734864
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,8,128,1,fp8,fp8,0,0.12279200553894043
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,48,128,1,float16,fp8,0,0.085343998670578
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,48,128,1,fp8,fp8,0,0.08539199829101562
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,1,128,1,float16,float16,0,0.07709599733352661
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,48,128,1,float16,fp8,0,0.5001008033752441
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,1,128,1,float16,fp8,0,0.07500159740447998
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,1,128,1,fp8,fp8,0,0.07466560006141662
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,48,128,1,fp8,fp8,0,0.499729585647583
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,2,128,1,float16,float16,0,0.07702879905700684
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,2,128,1,float16,fp8,0,0.074372798204422
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,2,128,1,fp8,fp8,0,0.07440639734268188
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,4,128,1,float16,float16,0,0.07711840271949769
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,4,128,1,float16,fp8,0,0.07403519749641418
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,4,128,1,fp8,fp8,0,0.07438719868659974
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,8,128,1,float16,float16,0,0.07912960052490234
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,8,128,1,float16,fp8,0,0.07418559789657593
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,8,128,1,fp8,fp8,0,0.07402240037918091
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,1,128,1,float16,fp8,0,0.22384800910949706
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,1,128,1,float16,float16,0,2.0029855728149415
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,1,128,1,float16,fp8,0,1.973575973510742
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,48,128,1,fp8,fp8,0,0.1496832013130188
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,1,128,1,fp8,fp8,0,1.972292709350586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,2,128,1,float16,float16,0,2.0182720184326173
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,48,128,1,float16,float16,0,0.0943664014339447
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,2,128,1,float16,fp8,0,1.9707183837890625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,2,128,1,fp8,fp8,0,1.9721551895141602
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,4,128,1,float16,fp8,0,1.9733680725097655
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,4,128,1,float16,float16,0,2.1994016647338865
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,8,128,1,float16,fp8,0,2.0984111785888673
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,48,128,1,float16,float16,0,1.429582405090332
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,8,128,1,fp8,fp8,0,1.9693424224853515
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,48,128,1,float16,fp8,0,1.2600831985473633
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,1,128,1,float16,fp8,0,1.010311985015869
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,48,128,1,fp8,fp8,0,1.2621999740600587
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,1,128,1,fp8,fp8,0,1.0678624153137206
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,2,128,1,float16,fp8,0,0.996332836151123
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,2,128,1,fp8,fp8,0,0.9999695777893066
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,4,128,1,float16,float16,0,1.0397199630737304
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,4,128,1,float16,fp8,0,1.0173935890197754
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,4,128,1,fp8,fp8,0,0.9968784332275391
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,4,128,1,fp8,fp8,0,1.9688783645629884
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,8,128,1,float16,float16,0,1.0682607650756837
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,8,128,1,float16,fp8,0,1.0058544158935547
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,8,128,1,float16,float16,0,2.153411293029785
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,48,128,1,float16,float16,0,0.7183792114257812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,8,128,1,fp8,fp8,0,1.007583999633789
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,1,128,1,float16,float16,0,0.5336944103240967
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,48,128,1,float16,fp8,0,0.6391759872436523
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,48,128,1,fp8,fp8,0,0.7286320209503174
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,1,128,1,float16,fp8,0,0.5106592178344727
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,1,128,1,fp8,fp8,0,0.5092976093292236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,2,128,1,float16,float16,0,0.5199295997619628
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,1,128,1,float16,float16,0,1.1666208267211915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,2,128,1,float16,fp8,0,0.5237008094787597
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,2,128,1,fp8,fp8,0,0.5086239814758301
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,4,128,1,float16,fp8,0,0.5104288101196289
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,4,128,1,fp8,fp8,0,0.5090767860412597
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,8,128,1,float16,float16,0,0.5431215763092041
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,48,128,1,float16,float16,0,0.3632848024368286
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,1,128,1,float16,float16,0,0.2689631938934326
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,8,128,1,fp8,fp8,0,0.5231567859649658
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,48,128,1,fp8,fp8,0,0.33238720893859863
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,1,128,1,float16,fp8,0,0.2656991958618164
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,1,128,1,fp8,fp8,0,0.2663424015045166
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,2,128,1,float16,float16,0,0.2702640056610107
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,2,128,1,float16,fp8,0,0.2653424024581909
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,2,128,1,fp8,fp8,0,0.2718224048614502
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,4,128,1,float16,float16,0,0.2740911960601807
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,4,128,1,float16,fp8,0,0.26595358848571776
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,4,128,1,fp8,fp8,0,0.27751359939575193
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,8,128,1,float16,float16,0,0.28266880512237547
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,8,128,1,float16,fp8,0,0.26614880561828613
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,8,128,1,fp8,fp8,0,0.27782559394836426
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,48,128,1,float16,float16,0,0.192631995677948
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,48,128,1,float16,fp8,0,0.17782080173492432
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,48,128,1,fp8,fp8,0,0.18084800243377686
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,1,128,1,float16,fp8,0,0.14394400119781495
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,1,128,1,fp8,fp8,0,0.14457600116729735
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,2,128,1,float16,float16,0,0.14774240255355836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,2,128,1,float16,fp8,0,0.14453599452972413
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,2,128,1,fp8,fp8,0,0.14449119567871094
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,4,128,1,float16,float16,0,0.1495903968811035
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,4,128,1,float16,fp8,0,0.1448032021522522
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,4,128,1,fp8,fp8,0,0.1456112027168274
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,8,128,1,float16,fp8,0,0.14510560035705566
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,4,128,1,float16,float16,0,0.549567985534668
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,8,128,1,fp8,fp8,0,0.14499520063400267
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,48,128,1,float16,float16,0,0.10775680541992187
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,48,128,1,float16,fp8,0,0.10108319520950318
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,48,128,1,fp8,fp8,0,0.1016495943069458
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,2,128,1,float16,float16,0,1.0250384330749511
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,1,128,1,float16,float16,0,0.08511199951171874
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,1,128,1,float16,fp8,0,0.08309760093688964
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,2,128,1,float16,float16,0,0.0856768012046814
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,2,128,1,float16,fp8,0,0.08297119736671447
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,4,128,1,float16,float16,0,0.08667680025100707
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,4,128,1,float16,fp8,0,0.08222879767417908
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,4,128,1,fp8,fp8,0,0.08285599946975708
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,8,128,1,float16,float16,0,0.0883840024471283
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,8,128,1,float16,fp8,0,0.083051198720932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,8,128,1,fp8,fp8,0,0.08325759768486023
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,48,128,1,float16,float16,0,0.0646016001701355
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,48,128,1,float16,fp8,0,0.05991680026054382
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,48,128,1,fp8,fp8,0,0.059806400537490846
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,1,128,1,float16,float16,0,0.05401759743690491
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,1,128,1,float16,fp8,0,0.051867198944091794
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,1,128,1,fp8,fp8,0,0.05179679989814758
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,2,128,1,float16,float16,0,0.05440800189971924
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,2,128,1,float16,fp8,0,0.05170239806175232
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,2,128,1,fp8,fp8,0,0.05208479762077332
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,1,128,1,float16,float16,0,0.14788800477981567
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,4,128,1,float16,float16,0,0.05443840026855469
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,4,128,1,float16,fp8,0,0.05234079957008362
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,4,128,1,fp8,fp8,0,0.05193120241165161
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,8,128,1,float16,float16,0,0.05548800230026245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,8,128,1,fp8,fp8,0,0.051811200380325315
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,8,128,1,float16,float16,0,0.1525920033454895
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,8,128,1,float16,fp8,0,0.510640001296997
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,1,128,1,fp8,fp8,0,0.08289920091629029
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,2,128,1,fp8,fp8,0,0.08263999819755555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,1,128,1,float16,float16,0,2.1012224197387694
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,1,128,1,float16,fp8,0,2.133697509765625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,1,128,1,fp8,fp8,0,2.130174446105957
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,8,128,1,float16,fp8,0,0.05215039849281311
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,2,128,1,float16,float16,0,2.094000053405762
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,2,128,1,float16,fp8,0,2.130191993713379
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,2,128,1,fp8,fp8,0,2.1312911987304686
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,4,128,1,float16,fp8,0,2.132441520690918
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,4,128,1,float16,float16,0,2.3126976013183596
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,48,128,1,float16,fp8,0,0.332588791847229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,4,128,1,fp8,fp8,0,2.1291776657104493
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,8,128,1,float16,float16,0,2.3086879730224608
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,48,128,1,float16,float16,0,1.593380832672119
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,8,128,1,float16,fp8,0,2.2291744232177733
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,48,128,1,float16,fp8,0,1.4238096237182618
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,1,128,1,float16,float16,0,1.0623663902282714
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,8,128,1,fp8,fp8,0,2.1294672012329103
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,1,128,1,float16,fp8,0,1.073788833618164
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,2,128,1,float16,float16,0,1.0892560005187988
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,2,128,1,float16,fp8,0,1.0807567596435548
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,2,128,1,fp8,fp8,0,1.0753456115722657
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,4,128,1,float16,float16,0,1.0881664276123046
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,4,128,1,float16,fp8,0,1.0736543655395507
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,4,128,1,fp8,fp8,0,1.0734687805175782
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,48,128,1,float16,fp8,0,0.7202239990234375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,8,128,1,float16,float16,0,1.1647024154663086
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,48,128,1,float16,float16,0,0.8411711692810059
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,8,128,1,float16,fp8,0,1.0761983871459961
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,8,128,1,fp8,fp8,0,1.075049591064453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,1,128,1,float16,float16,0,0.5369999885559082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,1,128,1,float16,fp8,0,0.5959631919860839
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,1,128,1,fp8,fp8,0,0.5728896141052247
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,2,128,1,float16,fp8,0,0.5467743873596191
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,2,128,1,fp8,fp8,0,0.5462672233581543
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,4,128,1,float16,float16,0,0.5698768138885498
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,48,128,1,fp8,fp8,0,1.5227248191833496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,4,128,1,fp8,fp8,0,0.5469168186187744
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,8,128,1,float16,float16,0,0.5709072113037109
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,8,128,1,float16,fp8,0,0.5624656200408935
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,48,128,1,float16,float16,0,0.4065040111541748
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,8,128,1,fp8,fp8,0,0.546451187133789
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,1,128,1,fp8,fp8,0,1.1932815551757812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,48,128,1,float16,fp8,0,0.3698767900466919
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,1,128,1,float16,float16,0,0.2798912048339844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,48,128,1,fp8,fp8,0,0.3739903926849365
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,1,128,1,float16,fp8,0,0.2827744007110596
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,1,128,1,fp8,fp8,0,0.2825632095336914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,2,128,1,float16,float16,0,0.2810447931289673
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,2,128,1,float16,fp8,0,0.28512799739837646
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,2,128,1,fp8,fp8,0,0.28293600082397463
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,4,128,1,float16,float16,0,0.283620810508728
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,4,128,1,float16,fp8,0,0.28305759429931643
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,4,128,1,fp8,fp8,0,0.28525118827819823
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,8,128,1,float16,float16,0,0.2941663980484009
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,8,128,1,float16,fp8,0,0.28315200805664065
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,48,128,1,fp8,fp8,0,0.7202415943145752
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,48,128,1,float16,float16,0,0.2119744062423706
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,48,128,1,float16,fp8,0,0.19579999446868895
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,48,128,1,fp8,fp8,0,0.1953168034553528
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,1,128,1,float16,float16,0,0.15203839540481567
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,1,128,1,fp8,fp8,0,0.1515727996826172
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,2,128,1,float16,float16,0,0.1514639973640442
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,2,128,1,float16,fp8,0,0.15102880001068114
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,2,128,1,fp8,fp8,0,0.15135519504547118
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,2,128,1,float16,float16,0,0.5369647979736328
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,4,128,1,float16,fp8,0,0.1511407971382141
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,4,128,1,fp8,fp8,0,0.15175039768218995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,8,128,1,float16,fp8,0,0.15143519639968872
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,8,128,1,fp8,fp8,0,0.15177600383758544
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,48,128,1,float16,float16,0,0.1153216004371643
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,48,128,1,float16,fp8,0,0.10755360126495361
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,4,128,1,float16,fp8,0,0.5642864227294921
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,48,128,1,fp8,fp8,0,0.10706720352172852
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,1,128,1,float16,float16,0,0.08538879752159119
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,1,128,1,float16,fp8,0,0.08348320126533508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,1,128,1,fp8,fp8,0,0.08327199816703797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,2,128,1,float16,float16,0,0.08586239814758301
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,2,128,1,float16,fp8,0,0.0839904010295868
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,2,128,1,fp8,fp8,0,0.08366559743881226
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,4,128,1,float16,float16,0,0.08776159882545471
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,4,128,1,float16,fp8,0,0.08389120101928711
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,4,128,1,fp8,fp8,0,0.08336640000343323
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,8,128,1,float16,float16,0,0.09015679955482483
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,8,128,1,float16,fp8,0,0.08405439853668213
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,8,128,1,fp8,fp8,0,0.08439040184020996
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,48,128,1,float16,fp8,0,0.06152960062026978
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,1,128,1,float16,float16,0,0.05140479803085327
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,1,128,1,float16,fp8,0,0.05065760016441345
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,1,128,1,fp8,fp8,0,0.05053600072860718
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,2,128,1,float16,float16,0,0.05178239941596985
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,2,128,1,float16,fp8,0,0.050267201662063596
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,2,128,1,fp8,fp8,0,0.05068640112876892
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,4,128,1,float16,float16,0,0.05220000147819519
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,4,128,1,fp8,fp8,0,0.05049759745597839
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,8,128,1,float16,float16,0,0.05344319939613342
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,8,128,1,float16,fp8,0,0.05039680004119873
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,8,128,1,fp8,fp8,0,0.050216001272201535
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,48,128,1,float16,float16,0,0.040268799662590025
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,48,128,1,float16,fp8,0,0.03930239975452423
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,48,128,1,fp8,fp8,0,0.03936479985713959
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,1,128,1,float16,float16,0,0.035359999537467955
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,1,128,1,float16,fp8,0,0.03494080007076263
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,1,128,1,float16,fp8,0,0.15155199766159058
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,1,128,1,fp8,fp8,0,0.03502399921417236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,2,128,1,float16,float16,0,0.03705599904060364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,2,128,1,float16,fp8,0,0.034955200552940366
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,2,128,1,fp8,fp8,0,0.03500159978866577
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,4,128,1,float16,float16,0,0.03664799928665161
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,4,128,1,float16,fp8,0,0.03508000075817108
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,4,128,1,float16,float16,0,0.1533360004425049
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,4,128,1,fp8,fp8,0,0.03506079912185669
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,8,128,1,float16,float16,0,0.037108799815177916
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,8,128,1,float16,fp8,0,0.03500800132751465
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,8,128,1,fp8,fp8,0,0.03500480055809021
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,8,128,1,float16,float16,0,0.158188796043396
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,48,128,1,float16,float16,0,0.06833279728889466
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,48,128,1,fp8,fp8,0,0.061883199214935306
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,48,1,128,1,float16,float16,0,1.569257640838623
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,48,1,128,1,float16,fp8,0,1.6394479751586915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,48,1,128,1,fp8,fp8,0,1.6380575180053711
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,48,2,128,1,float16,float16,0,1.5721263885498047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,4,128,1,float16,fp8,0,0.050355201959609984
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,8,128,1,fp8,fp8,0,0.28336639404296876
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,48,2,128,1,float16,fp8,0,1.6402959823608398
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,48,2,128,1,fp8,fp8,0,1.6380144119262696
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,48,4,128,1,float16,float16,0,1.6467487335205078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,48,4,128,1,float16,fp8,0,1.6354272842407227
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,48,4,128,1,fp8,fp8,0,1.6351696014404298
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,48,8,128,1,float16,float16,0,1.7664512634277343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,48,8,128,1,float16,fp8,0,1.6352880477905274
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,48,1,128,1,float16,float16,0,0.7868288040161133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,48,8,128,1,fp8,fp8,0,1.6354352951049804
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,48,1,128,1,float16,fp8,0,0.8268591880798339
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,48,48,128,1,float16,fp8,0,1.1659456253051759
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,48,2,128,1,float16,float16,0,0.7899392127990723
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,48,48,128,1,fp8,fp8,0,1.2355728149414062
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,48,1,128,1,fp8,fp8,0,0.8980128288269043
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,48,2,128,1,float16,fp8,0,0.8257568359375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,48,2,128,1,fp8,fp8,0,0.825928020477295
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,48,4,128,1,float16,float16,0,0.8090448379516602
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,48,4,128,1,float16,fp8,0,0.8261551856994629
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,48,8,128,1,float16,float16,0,0.8601903915405273
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,48,8,128,1,float16,fp8,0,0.8561936378479004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,48,48,128,1,float16,fp8,0,0.5900303840637207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,48,1,128,1,float16,float16,0,0.40668158531188964
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,48,8,128,1,fp8,fp8,0,0.8241392135620117
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,48,48,128,1,fp8,fp8,0,0.596289587020874
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,48,1,128,1,float16,fp8,0,0.4202911853790283
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,48,1,128,1,fp8,fp8,0,0.4256239891052246
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,48,2,128,1,float16,float16,0,0.4028639793395996
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,48,2,128,1,float16,fp8,0,0.4202847957611084
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,48,2,128,1,fp8,fp8,0,0.4201519966125488
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,48,4,128,1,float16,float16,0,0.41728959083557127
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,48,4,128,1,float16,fp8,0,0.4204751968383789
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,48,4,128,1,fp8,fp8,0,0.4204751968383789
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,48,8,128,1,float16,float16,0,0.4316688060760498
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,48,8,128,1,float16,fp8,0,0.42133278846740724
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,48,8,128,1,fp8,fp8,0,0.41985602378845216
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,48,48,128,1,float16,float16,0,0.32952640056610105
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,48,48,128,1,float16,float16,0,1.2835359573364258
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,48,48,128,1,float16,fp8,0,0.30345280170440675
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,48,1,128,1,float16,float16,0,0.21011359691619874
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,48,48,128,1,fp8,fp8,0,0.30397279262542726
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,48,1,128,1,fp8,fp8,0,0.21848158836364745
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,48,2,128,1,float16,float16,0,0.21069760322570802
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,48,2,128,1,fp8,fp8,0,0.21856160163879396
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,48,4,128,1,float16,float16,0,0.2161423921585083
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,48,4,128,1,float16,fp8,0,0.21776320934295654
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,48,4,128,1,fp8,fp8,0,0.21834399700164794
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,48,8,128,1,float16,float16,0,0.22539680004119872
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,48,8,128,1,float16,fp8,0,0.21780319213867189
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,48,4,128,1,fp8,fp8,0,0.8435215950012207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,48,8,128,1,fp8,fp8,0,0.21822879314422608
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,48,48,128,1,float16,float16,0,0.17329920530319215
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,48,48,128,1,float16,fp8,0,0.16015039682388305
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,48,1,128,1,float16,float16,0,0.11480159759521484
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,48,1,128,1,float16,fp8,0,0.1172287940979004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,48,48,128,1,fp8,fp8,0,0.15997920036315919
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,48,1,128,1,fp8,fp8,0,0.11667840480804444
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,48,2,128,1,float16,float16,0,0.11527359485626221
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,48,2,128,1,float16,fp8,0,0.11687359809875489
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,48,4,128,1,float16,float16,0,0.1165887951850891
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,48,4,128,1,float16,fp8,0,0.1171231985092163
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,48,4,128,1,fp8,fp8,0,0.11730879545211792
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,48,8,128,1,float16,float16,0,0.12150720357894898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,48,8,128,1,float16,fp8,0,0.11693600416183472
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,48,8,128,1,fp8,fp8,0,0.11773920059204102
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,48,48,128,1,float16,float16,0,0.09521920084953309
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,48,48,128,1,fp8,fp8,0,0.08838880062103271
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,48,1,128,1,float16,float16,0,0.06602399945259094
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,48,1,128,1,float16,fp8,0,0.06561599969863892
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,48,1,128,1,fp8,fp8,0,0.06552799940109252
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,48,2,128,1,float16,fp8,0,0.06517760157585144
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,48,2,128,1,fp8,fp8,0,0.06537439823150634
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,48,4,128,1,float16,float16,0,0.06757439970970154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,48,4,128,1,float16,fp8,0,0.065556800365448
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,48,4,128,1,fp8,fp8,0,0.06579520106315613
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,48,8,128,1,float16,float16,0,0.07037760019302368
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,48,8,128,1,float16,fp8,0,0.06594399809837341
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,48,8,128,1,fp8,fp8,0,0.06574400067329407
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,48,48,128,1,float16,float16,0,0.05554720163345337
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,48,48,128,1,float16,fp8,0,0.049351999163627626
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,48,1,128,1,float16,float16,0,0.039084801077842714
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,48,1,128,1,float16,fp8,0,0.03905119895935059
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,48,48,128,1,fp8,fp8,0,0.04938240051269531
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,48,1,128,1,fp8,fp8,0,0.039134401082992556
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,48,1,128,1,float16,fp8,0,0.21770079135894777
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,48,2,128,1,float16,float16,0,0.039131200313568114
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,48,4,128,1,float16,float16,0,0.03914400041103363
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,48,2,128,1,float16,fp8,0,0.03902879953384399
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,48,4,128,1,float16,fp8,0,0.03881439864635468
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,48,4,128,1,fp8,fp8,0,0.039078399538993835
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,48,8,128,1,float16,float16,0,0.04122720062732697
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,48,8,128,1,float16,fp8,0,0.03907040059566498
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,48,8,128,1,fp8,fp8,0,0.03917919993400574
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,48,48,128,1,float16,float16,0,0.032583999633789065
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,48,48,128,1,float16,fp8,0,0.03270080089569092
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,48,48,128,1,fp8,fp8,0,0.03249599933624268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,48,1,128,1,float16,float16,0,0.026824000477790832
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,48,1,128,1,float16,fp8,0,0.026785600185394286
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,48,1,128,1,fp8,fp8,0,0.026846399903297423
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,48,2,128,1,float16,float16,0,0.02678399980068207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,48,2,128,1,float16,fp8,0,0.02688960134983063
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,48,2,128,1,fp8,fp8,0,0.026855999231338502
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,48,4,128,1,float16,float16,0,0.027127999067306518
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,48,4,128,1,float16,fp8,0,0.026756799221038817
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,48,4,128,1,fp8,fp8,0,0.026881599426269533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,48,8,128,1,float16,float16,0,0.027779200673103334
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,48,8,128,1,float16,fp8,0,0.026796799898147584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,48,2,128,1,fp8,fp8,0,0.11715519428253174
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,48,48,128,1,float16,float16,0,0.020686399936676026
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,48,48,128,1,float16,fp8,0,0.02067520022392273
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,48,48,128,1,fp8,fp8,0,0.020713600516319274
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,48,1,128,1,float16,fp8,0,0.018607999384403228
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,48,1,128,1,float16,float16,0,0.0186256006360054
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,48,2,128,1,float16,float16,0,0.018676799535751343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,48,48,128,1,float16,fp8,0,0.08829600214958191
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,48,2,128,1,float16,fp8,0,0.018593600392341612
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,48,2,128,1,fp8,fp8,0,0.018592000007629395
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,48,4,128,1,float16,float16,0,0.0186256006360054
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,48,4,128,1,float16,fp8,0,0.018545599281787874
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,48,4,128,1,fp8,fp8,0,0.018624000251293182
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,48,1,128,1,fp8,fp8,0,0.0186271995306015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,48,8,128,1,float16,float16,0,0.01868479996919632
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,48,8,128,1,float16,fp8,0,0.018651199340820313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,48,8,128,1,fp8,fp8,0,0.018607999384403228
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,48,1,128,1,float16,float16,0,0.6452832221984863
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,48,1,128,1,float16,fp8,0,0.6950160026550293
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,48,2,128,1,fp8,fp8,0,0.03903680145740509
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,48,2,128,1,float16,fp8,0,0.21758880615234374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,48,1,128,1,fp8,fp8,0,0.6951551914215088
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,48,2,128,1,float16,float16,0,0.6474656105041504
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,48,2,128,1,float16,fp8,0,0.6939568042755127
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,48,2,128,1,fp8,fp8,0,0.6929232120513916
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,48,4,128,1,float16,float16,0,0.6671487808227539
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,48,4,128,1,float16,fp8,0,0.693336009979248
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,48,48,128,1,float16,float16,0,0.6502943992614746
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,48,8,128,1,fp8,fp8,0,0.02674719989299774
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,48,4,128,1,fp8,fp8,0,0.693177604675293
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,48,8,128,1,float16,float16,0,0.7053936004638672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,48,48,128,1,float16,float16,0,0.5681151866912841
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,48,8,128,1,fp8,fp8,0,0.6927807807922364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,48,48,128,1,float16,fp8,0,0.5210976123809814
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,48,1,128,1,float16,float16,0,0.3290976047515869
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,48,48,128,1,fp8,fp8,0,0.521665620803833
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,48,1,128,1,fp8,fp8,0,0.3526047945022583
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,48,2,128,1,float16,fp8,0,0.35226879119873045
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,48,2,128,1,fp8,fp8,0,0.35240960121154785
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,48,4,128,1,float16,float16,0,0.3392927885055542
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,48,4,128,1,float16,fp8,0,0.35237600803375246
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,48,4,128,1,fp8,fp8,0,0.3526288032531738
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,48,8,128,1,float16,float16,0,0.35900321006774905
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,48,8,128,1,float16,fp8,0,0.3527712106704712
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,48,8,128,1,fp8,fp8,0,0.3527712106704712
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,48,48,128,1,float16,float16,0,0.28866400718688967
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,48,48,128,1,float16,fp8,0,0.26669600009918215
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,48,48,128,1,fp8,fp8,0,0.2665600061416626
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,48,1,128,1,float16,float16,0,0.17132799625396727
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,48,1,128,1,float16,fp8,0,0.18124959468841553
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,48,1,128,1,fp8,fp8,0,0.18160799741744996
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,48,2,128,1,float16,float16,0,0.17151360511779784
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,48,2,128,1,float16,fp8,0,0.18135199546813965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,48,2,128,1,fp8,fp8,0,0.18158559799194335
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,48,4,128,1,float16,float16,0,0.17721760272979736
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,48,4,128,1,float16,fp8,0,0.18123199939727783
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,48,4,128,1,fp8,fp8,0,0.1811247944831848
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,48,8,128,1,float16,fp8,0,0.18170080184936524
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,48,8,128,1,fp8,fp8,0,0.1814128041267395
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,48,8,128,1,float16,fp8,0,0.6942575931549072
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,48,48,128,1,float16,float16,0,0.15175199508666992
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,48,48,128,1,float16,fp8,0,0.1414720058441162
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,48,2,128,1,float16,float16,0,0.06607360243797303
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,48,1,128,1,float16,float16,0,0.094896000623703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,48,1,128,1,float16,fp8,0,0.09860799908638
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,48,1,128,1,fp8,fp8,0,0.09882400035858155
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,48,2,128,1,float16,float16,0,0.09481920003890991
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,48,2,128,1,float16,fp8,0,0.0990224003791809
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,48,2,128,1,fp8,fp8,0,0.09834880232810975
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,48,4,128,1,float16,float16,0,0.09727839827537536
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,48,4,128,1,float16,fp8,0,0.0985696017742157
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,48,4,128,1,fp8,fp8,0,0.09900799989700318
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,48,2,128,1,float16,float16,0,0.33037600517272947
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,48,8,128,1,float16,float16,0,0.10137920379638672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,48,8,128,1,float16,fp8,0,0.09929760098457337
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,48,8,128,1,fp8,fp8,0,0.09895359873771667
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,48,48,128,1,float16,float16,0,0.08447039723396302
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,48,48,128,1,float16,fp8,0,0.07894399762153625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,48,1,128,1,float16,float16,0,0.055339199304580686
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,48,1,128,1,float16,fp8,0,0.055769598484039305
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,48,2,128,1,float16,float16,0,0.05553920269012451
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,48,2,128,1,float16,fp8,0,0.055632001161575316
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,48,4,128,1,float16,float16,0,0.05756959915161133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,48,4,128,1,float16,fp8,0,0.05554559826850891
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,48,8,128,1,float16,float16,0,0.05975040197372437
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,48,4,128,1,fp8,fp8,0,0.0556768000125885
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,48,8,128,1,float16,fp8,0,0.055576002597808837
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,48,8,128,1,fp8,fp8,0,0.055508798360824584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,48,48,128,1,float16,float16,0,0.047849598526954654
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,48,48,128,1,float16,fp8,0,0.042987200617790225
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,48,48,128,1,fp8,fp8,0,0.04319359958171844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,48,1,128,1,float16,float16,0,0.030980798602104186
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,48,1,128,1,float16,fp8,0,0.032374399900436404
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,48,1,128,1,fp8,fp8,0,0.030943998694419862
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,48,2,128,1,float16,float16,0,0.031462401151657104
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,48,2,128,1,float16,fp8,0,0.0319920003414154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,48,2,128,1,fp8,fp8,0,0.03248000144958496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,48,4,128,1,float16,float16,0,0.03245280086994171
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,48,4,128,1,float16,fp8,0,0.032513600587844846
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,48,4,128,1,fp8,fp8,0,0.03181760013103485
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,48,8,128,1,float16,float16,0,0.03307519853115082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,48,8,128,1,float16,fp8,0,0.03236159980297089
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,48,8,128,1,fp8,fp8,0,0.032513600587844846
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,48,48,128,1,float16,float16,0,0.02851040065288544
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,48,48,128,1,float16,fp8,0,0.028889599442481994
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,48,48,128,1,fp8,fp8,0,0.028886398673057555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,48,1,128,1,float16,float16,0,0.022726400196552275
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,48,1,128,1,float16,fp8,0,0.022734400629997254
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,48,1,128,1,fp8,fp8,0,0.022724799811840057
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,48,2,128,1,float16,float16,0,0.02276480048894882
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,48,2,128,1,float16,fp8,0,0.02274080067873001
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,48,2,128,1,fp8,fp8,0,0.02269279956817627
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,48,4,128,1,float16,float16,0,0.022771200537681578
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,48,8,128,1,float16,float16,0,0.1864799976348877
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,48,4,128,1,float16,fp8,0,0.022779199481010436
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,48,4,128,1,fp8,fp8,0,0.022711999714374542
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,48,8,128,1,float16,float16,0,0.023464000225067137
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,48,8,128,1,float16,fp8,0,0.02279839962720871
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,48,8,128,1,fp8,fp8,0,0.022732800245285033
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,48,48,128,1,float16,float16,0,0.018408000469207764
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,48,48,128,1,float16,fp8,0,0.01866080015897751
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,48,48,128,1,fp8,fp8,0,0.018585599958896637
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,48,1,128,1,float16,float16,0,0.016521599888801575
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,48,1,128,1,float16,fp8,0,0.014560000598430633
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,48,1,128,1,fp8,fp8,0,0.016139200329780577
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,48,2,128,1,float16,float16,0,0.014737600088119506
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,48,48,128,1,fp8,fp8,0,0.14158719778060913
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,48,2,128,1,float16,fp8,0,0.01629280000925064
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,48,2,128,1,fp8,fp8,0,0.014951999485492706
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,48,4,128,1,float16,float16,0,0.016599999368190767
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,48,4,128,1,fp8,fp8,0,0.015481600165367126
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,48,8,128,1,float16,float16,0,0.016620799899101257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,48,8,128,1,float16,fp8,0,0.016259199380874632
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,48,8,128,1,fp8,fp8,0,0.01656160056591034
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,48,48,128,1,float16,float16,0,0.01690240055322647
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,48,1,128,1,float16,fp8,0,0.3529695987701416
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,48,48,128,1,float16,fp8,0,0.016953599452972413
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,48,48,128,1,fp8,fp8,0,0.016574400663375854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,48,1,128,1,float16,float16,0,0.014579200744628906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,48,1,128,1,float16,fp8,0,0.014547200500965118
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,48,2,128,1,float16,float16,0,0.014553600549697876
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,48,48,128,1,fp8,fp8,0,0.07936639785766601
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,48,1,128,1,fp8,fp8,0,0.05549439787864685
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,48,2,128,1,fp8,fp8,0,0.01451680064201355
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,48,4,128,1,float16,float16,0,0.014532800018787383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,48,4,128,1,float16,fp8,0,0.014502400159835815
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,48,2,128,1,fp8,fp8,0,0.05554559826850891
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,48,4,128,1,fp8,fp8,0,0.014612799882888794
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,48,8,128,1,float16,float16,0,0.014569599926471711
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,48,8,128,1,float16,fp8,0,0.014532800018787383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,48,8,128,1,fp8,fp8,0,0.015003199875354766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,48,1,128,1,float16,float16,0,0.396724796295166
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,48,1,128,1,float16,fp8,0,0.41997599601745605
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,48,1,128,1,fp8,fp8,0,0.4208960056304932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,48,2,128,1,float16,fp8,0,0.4208064079284668
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,48,4,128,1,float16,float16,0,0.4065983772277832
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,48,2,128,1,fp8,fp8,0,0.4201007843017578
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,48,4,128,1,float16,fp8,0,0.4199488162994385
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,48,4,128,1,fp8,fp8,0,0.42006402015686034
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,48,8,128,1,float16,float16,0,0.42482719421386717
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,48,48,128,1,float16,float16,0,0.3193376064300537
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,48,8,128,1,float16,fp8,0,0.41986398696899413
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,48,8,128,1,fp8,fp8,0,0.41913280487060545
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,48,48,128,1,float16,fp8,0,0.2994271993637085
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,48,1,128,1,float16,float16,0,0.20586080551147462
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,48,1,128,1,float16,fp8,0,0.21566879749298096
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,48,48,128,1,fp8,fp8,0,0.2993583917617798
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,48,1,128,1,fp8,fp8,0,0.2154848098754883
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,48,2,128,1,float16,float16,0,0.206166410446167
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,48,4,128,1,float16,fp8,0,0.01648640036582947
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,48,2,128,1,fp8,fp8,0,0.21525919437408447
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,48,4,128,1,float16,float16,0,0.2095344066619873
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,48,4,128,1,float16,fp8,0,0.2156048059463501
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,48,4,128,1,fp8,fp8,0,0.215499210357666
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,48,8,128,1,float16,float16,0,0.21838879585266113
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,48,8,128,1,float16,fp8,0,0.21569440364837647
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,48,2,128,1,float16,fp8,0,0.014519999921321868
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,48,48,128,1,float16,float16,0,0.16371999979019164
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,48,8,128,1,fp8,fp8,0,0.2159168004989624
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,48,48,128,1,float16,fp8,0,0.15450079441070558
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,48,48,128,1,fp8,fp8,0,0.15585440397262573
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,48,1,128,1,float16,float16,0,0.10888479948043824
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,48,1,128,1,fp8,fp8,0,0.11307200193405151
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,48,1,128,1,float16,fp8,0,0.11262400150299072
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,48,2,128,1,float16,fp8,0,0.11282399892807007
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,48,4,128,1,float16,float16,0,0.11102559566497802
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,48,4,128,1,float16,fp8,0,0.1126431941986084
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,48,4,128,1,fp8,fp8,0,0.1129263997077942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,48,8,128,1,float16,float16,0,0.11558400392532349
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,48,8,128,1,float16,fp8,0,0.11346240043640136
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,48,8,128,1,fp8,fp8,0,0.11272159814834595
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,48,48,128,1,float16,float16,0,0.08759040236473084
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,48,48,128,1,float16,fp8,0,0.08421760201454162
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,48,48,128,1,fp8,fp8,0,0.08415200114250183
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,48,1,128,1,float16,float16,0,0.060729598999023436
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,48,1,128,1,float16,fp8,0,0.06166239976882935
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,48,1,128,1,fp8,fp8,0,0.0616208016872406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,48,2,128,1,float16,float16,0,0.061596798896789554
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,48,2,128,1,float16,fp8,0,0.06165120005607605
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,48,2,128,1,fp8,fp8,0,0.06167200207710266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,48,4,128,1,float16,float16,0,0.06225280165672302
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,48,4,128,1,float16,fp8,0,0.061667197942733766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,48,8,128,1,float16,float16,0,0.06459360122680664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,48,8,128,1,float16,fp8,0,0.06178560256958008
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,48,8,128,1,fp8,fp8,0,0.061667197942733766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,48,48,128,1,float16,float16,0,0.05165759921073913
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,48,48,128,1,float16,fp8,0,0.047328001260757445
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,48,48,128,1,fp8,fp8,0,0.04730879962444305
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,48,1,128,1,float16,fp8,0,0.0356799989938736
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,48,1,128,1,fp8,fp8,0,0.03617759943008423
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,48,2,128,1,float16,float16,0,0.03508639931678772
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,48,2,128,1,float16,fp8,0,0.036134400963783266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,48,2,128,1,float16,fp8,0,0.21546399593353271
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,48,2,128,1,fp8,fp8,0,0.03675200045108795
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,48,4,128,1,float16,float16,0,0.035566401481628415
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,48,1,128,1,fp8,fp8,0,0.014521600306034088
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,48,4,128,1,fp8,fp8,0,0.03675679862499237
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,48,8,128,1,float16,float16,0,0.037084800004959104
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,48,8,128,1,float16,fp8,0,0.036051198840141296
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,48,2,128,1,float16,float16,0,0.1087183952331543
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,48,2,128,1,fp8,fp8,0,0.11313600540161133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,48,8,128,1,fp8,fp8,0,0.036233600974082944
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,48,48,128,1,float16,float16,0,0.02478879988193512
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,48,48,128,1,float16,fp8,0,0.02683840095996857
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,48,1,128,1,float16,float16,0,0.020721599459648132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,48,1,128,1,fp8,fp8,0,0.02077919989824295
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,48,2,128,1,float16,float16,0,0.39724159240722656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,48,2,128,1,float16,fp8,0,0.022601599991321563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,48,1,128,1,float16,fp8,0,0.022761599719524385
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,48,4,128,1,float16,float16,0,0.022752000391483305
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,48,4,128,1,float16,fp8,0,0.02279040068387985
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,48,4,128,1,fp8,fp8,0,0.022628800570964815
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,48,8,128,1,float16,float16,0,0.02271360009908676
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,48,8,128,1,float16,fp8,0,0.02191520035266876
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,48,8,128,1,fp8,fp8,0,0.02229280024766922
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,48,48,128,1,float16,float16,0,0.018718400597572328
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,48,48,128,1,float16,fp8,0,0.01870719939470291
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,48,1,128,1,float16,float16,0,0.03511840105056763
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,48,48,128,1,fp8,fp8,0,0.018777599930763243
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,48,1,128,1,float16,float16,0,0.016705599427223206
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,48,1,128,1,float16,fp8,0,0.016625599563121797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,48,1,128,1,fp8,fp8,0,0.016616000235080718
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,48,2,128,1,float16,float16,0,0.016577599942684172
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,48,2,128,1,float16,fp8,0,0.01658719927072525
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,48,2,128,1,fp8,fp8,0,0.016596800088882445
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,48,4,128,1,float16,float16,0,0.016595199704170227
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,48,4,128,1,float16,fp8,0,0.016633599996566772
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,48,4,128,1,fp8,fp8,0,0.01664319932460785
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,48,8,128,1,float16,float16,0,0.016612799465656282
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,48,8,128,1,float16,fp8,0,0.0166143998503685
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,48,8,128,1,fp8,fp8,0,0.01658399999141693
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,48,48,128,1,float16,float16,0,0.012548799812793731
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,48,48,128,1,fp8,fp8,0,0.012574400007724761
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,48,1,128,1,float16,float16,0,0.011240000277757645
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,48,1,128,1,float16,fp8,0,0.011297599971294403
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,48,1,128,1,fp8,fp8,0,0.012508800625801087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,48,48,128,1,float16,fp8,0,0.012899200618267059
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,48,2,128,1,float16,float16,0,0.012467200309038163
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,48,2,128,1,float16,fp8,0,0.012467200309038163
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,48,2,128,1,fp8,fp8,0,0.012535999715328216
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,48,4,128,1,float16,float16,0,0.0125231996178627
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,48,4,128,1,float16,fp8,0,0.036847999691963194
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,48,4,128,1,float16,fp8,0,0.012516799569129943
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,48,4,128,1,fp8,fp8,0,0.012494400143623352
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,48,8,128,1,float16,float16,0,0.012503999471664428
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,48,8,128,1,float16,fp8,0,0.012414400279521943
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,48,8,128,1,fp8,fp8,0,0.012521600723266602
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,48,48,128,1,float16,fp8,0,0.012503999471664428
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,48,48,128,1,float16,float16,0,0.012513600289821625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,48,1,128,1,float16,float16,0,0.010428799688816071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,48,1,128,1,float16,fp8,0,0.010547199845314026
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,48,1,128,1,fp8,fp8,0,0.010427200049161912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,48,2,128,1,float16,fp8,0,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,48,48,128,1,fp8,fp8,0,0.026790401339530943
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,48,2,128,1,fp8,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,48,4,128,1,float16,float16,0,0.01043040007352829
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,48,4,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,48,4,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,48,2,128,1,float16,float16,0,0.020659199357032774
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,48,8,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,48,8,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,48,8,128,1,fp8,fp8,0,0.011587200313806533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,48,2,128,1,fp8,fp8,0,0.021113599836826324
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,48,1,128,1,float16,float16,0,0.3100016117095947
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,48,1,128,1,float16,fp8,0,0.32109599113464354
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,48,1,128,1,fp8,fp8,0,0.3207263946533203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,48,2,128,1,float16,float16,0,0.3113872051239014
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,48,4,128,1,fp8,fp8,0,0.06155679821968078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,48,2,128,1,float16,fp8,0,0.3209728002548218
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,48,2,128,1,fp8,fp8,0,0.32137279510498046
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,48,4,128,1,float16,float16,0,0.31531200408935545
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,48,4,128,1,float16,fp8,0,0.320630407333374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,48,4,128,1,fp8,fp8,0,0.3199631929397583
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,48,8,128,1,float16,float16,0,0.3243808031082153
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,48,8,128,1,float16,fp8,0,0.3207040071487427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,48,8,128,1,fp8,fp8,0,0.3200975894927979
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,48,48,128,1,fp8,fp8,0,0.20834720134735107
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,48,1,128,1,float16,float16,0,0.1619984030723572
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,48,1,128,1,float16,fp8,0,0.16563680171966552
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,48,1,128,1,fp8,fp8,0,0.16624480485916138
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,48,2,128,1,float16,float16,0,0.16284799575805664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,48,2,128,1,float16,fp8,0,0.16543680429458618
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,48,2,128,1,fp8,fp8,0,0.1659279942512512
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,48,4,128,1,float16,fp8,0,0.16545759439468383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,48,4,128,1,fp8,fp8,0,0.16505119800567628
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,48,8,128,1,float16,float16,0,0.16915520429611205
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,48,8,128,1,float16,fp8,0,0.16516159772872924
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,48,48,128,1,float16,float16,0,0.11320639848709106
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,48,8,128,1,fp8,fp8,0,0.16620320081710815
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,48,48,128,1,float16,fp8,0,0.10879199504852295
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,48,48,128,1,fp8,fp8,0,0.109334397315979
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,48,1,128,1,float16,float16,0,0.08706560134887695
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,48,1,128,1,float16,fp8,0,0.08676159977912903
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,48,1,128,1,fp8,fp8,0,0.08695039749145508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,48,48,128,1,fp8,fp8,0,0.012444800138473511
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,48,2,128,1,float16,float16,0,0.08737599849700928
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,48,2,128,1,float16,fp8,0,0.08711519837379456
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,48,2,128,1,fp8,fp8,0,0.08677440285682678
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,48,2,128,1,float16,float16,0,0.010491199791431427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,48,4,128,1,float16,float16,0,0.08852159976959229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,48,4,128,1,float16,fp8,0,0.0866320013999939
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,48,4,128,1,fp8,fp8,0,0.08708959817886353
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,48,8,128,1,float16,float16,0,0.0911520004272461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,48,8,128,1,float16,fp8,0,0.08718240261077881
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,48,8,128,1,fp8,fp8,0,0.0869871973991394
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,48,48,128,1,float16,float16,0,0.06370880007743836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,48,48,128,1,float16,fp8,0,0.05957760214805603
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,48,48,128,1,fp8,fp8,0,0.05959039926528931
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,48,1,128,1,float16,float16,0,0.04785439968109131
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,48,2,128,1,float16,float16,0,0.048259198665618896
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,48,2,128,1,float16,fp8,0,0.0482448011636734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,48,2,128,1,fp8,fp8,0,0.048860800266265866
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,48,4,128,1,float16,float16,0,0.048824000358581546
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,48,4,128,1,float16,fp8,0,0.048161599040031436
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,48,4,128,1,fp8,fp8,0,0.04821760058403015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,48,8,128,1,float16,float16,0,0.049584001302719116
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,48,8,128,1,float16,fp8,0,0.0488431990146637
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,48,8,128,1,fp8,fp8,0,0.048135998845100406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,48,48,128,1,float16,fp8,0,0.03463680148124695
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,48,48,128,1,fp8,fp8,0,0.034241598844528195
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,48,1,128,1,float16,float16,0,0.02889919877052307
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,48,1,128,1,float16,fp8,0,0.028862398862838746
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,48,48,128,1,float16,float16,0,0.21644160747528077
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,48,2,128,1,float16,float16,0,0.028963199257850646
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,48,2,128,1,float16,fp8,0,0.028884801268577575
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,48,2,128,1,fp8,fp8,0,0.02897599935531616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,48,4,128,1,float16,float16,0,0.029009601473808287
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,48,4,128,1,float16,fp8,0,0.028969600796699524
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,48,4,128,1,float16,float16,0,0.16502079963684083
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,48,4,128,1,fp8,fp8,0,0.028961598873138428
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,48,8,128,1,float16,float16,0,0.030707201361656188
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,48,8,128,1,float16,fp8,0,0.02892000079154968
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,48,8,128,1,fp8,fp8,0,0.028884801268577575
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,48,48,128,1,float16,float16,0,0.02072319984436035
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,48,48,128,1,float16,fp8,0,0.020691199600696562
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,48,48,128,1,fp8,fp8,0,0.02067999988794327
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,48,1,128,1,float16,float16,0,0.018643200397491455
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,48,1,128,1,float16,fp8,0,0.018628799915313722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,48,1,128,1,fp8,fp8,0,0.01860959976911545
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,48,2,128,1,float16,float16,0,0.01865919977426529
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,48,2,128,1,float16,fp8,0,0.018611200153827667
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,48,2,128,1,fp8,fp8,0,0.018753600120544434
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,48,4,128,1,float16,float16,0,0.018617600202560425
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,48,4,128,1,fp8,fp8,0,0.018665599822998046
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,48,4,128,1,float16,fp8,0,0.018638400733470915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,48,8,128,1,float16,float16,0,0.01860959976911545
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,48,8,128,1,float16,fp8,0,0.018673600256443025
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,48,8,128,1,fp8,fp8,0,0.018692800402641298
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,48,48,128,1,float16,float16,0,0.016467200219631196
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,48,48,128,1,float16,fp8,0,0.014742399752140044
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,48,48,128,1,fp8,fp8,0,0.016616000235080718
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,48,1,128,1,float16,fp8,0,0.014563199877738953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,48,1,128,1,fp8,fp8,0,0.014582400023937226
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,48,2,128,1,float16,float16,0,0.014548799395561219
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,48,2,128,1,float16,fp8,0,0.014508800208568573
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,48,2,128,1,fp8,fp8,0,0.014584000408649444
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,48,4,128,1,float16,float16,0,0.014532800018787383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,48,4,128,1,float16,fp8,0,0.014556799829006196
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,48,4,128,1,fp8,fp8,0,0.014561599493026734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,48,1,128,1,float16,fp8,0,0.04859359860420227
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,48,8,128,1,float16,fp8,0,0.01449120044708252
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,48,8,128,1,fp8,fp8,0,0.014545600116252898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,48,48,128,1,float16,float16,0,0.012510399520397186
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,48,48,128,1,float16,fp8,0,0.010976000130176545
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,48,48,128,1,fp8,fp8,0,0.010945600271224976
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,48,1,128,1,float16,float16,0,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,48,1,128,1,float16,fp8,0,0.010452800244092942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,48,1,128,1,fp8,fp8,0,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,48,2,128,1,float16,fp8,0,0.010548800230026245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,48,2,128,1,float16,float16,0,0.010471999645233154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,48,2,128,1,fp8,fp8,0,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,48,48,128,1,float16,float16,0,0.03488959968090057
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,48,4,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,48,4,128,1,fp8,fp8,0,0.010425599664449692
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,48,8,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,48,8,128,1,float16,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,48,8,128,1,fp8,fp8,0,0.010487999767065048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,48,48,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,48,48,128,1,float16,fp8,0,0.010428799688816071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,48,48,128,1,fp8,fp8,0,0.010436800122261048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,48,4,128,1,float16,fp8,0,0.010476800054311753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,48,1,128,1,float16,float16,0,0.0105103999376297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,48,1,128,1,float16,fp8,0,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,48,1,128,1,fp8,fp8,0,0.010452800244092942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,48,1,128,1,fp8,fp8,0,0.028808000683784484
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,48,2,128,1,float16,fp8,0,0.010435199737548828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,48,2,128,1,fp8,fp8,0,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,48,4,128,1,float16,float16,0,0.010419200360774993
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,48,4,128,1,float16,fp8,0,0.010424000024795533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,48,4,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,48,8,128,1,float16,float16,0,0.010372799634933472
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,48,8,128,1,fp8,fp8,0,0.01048479974269867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,48,1,128,1,float16,float16,0,0.2696448087692261
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,48,1,128,1,float16,fp8,0,0.2737728118896484
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,48,1,128,1,fp8,fp8,0,0.27363200187683107
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,48,2,128,1,float16,float16,0,0.26924800872802734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,48,1,128,1,float16,float16,0,0.01451680064201355
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,48,2,128,1,float16,fp8,0,0.2736095905303955
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,48,2,128,1,fp8,fp8,0,0.2743216037750244
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,48,1,128,1,fp8,fp8,0,0.0486735999584198
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,48,4,128,1,float16,float16,0,0.2713855981826782
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,48,4,128,1,float16,fp8,0,0.27494399547576903
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,48,4,128,1,fp8,fp8,0,0.27330238819122316
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,48,8,128,1,float16,fp8,0,0.2742399930953979
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,48,8,128,1,fp8,fp8,0,0.2741760015487671
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,48,48,128,1,float16,float16,0,0.16627039909362792
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,48,48,128,1,float16,fp8,0,0.16423200368881224
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,48,48,128,1,fp8,fp8,0,0.16378240585327147
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,48,1,128,1,float16,float16,0,0.141867196559906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,48,2,128,1,float16,float16,0,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,48,1,128,1,float16,fp8,0,0.14088319540023803
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,48,48,128,1,float16,fp8,0,0.2078416109085083
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,48,8,128,1,float16,fp8,0,0.010446400195360184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,48,2,128,1,float16,float16,0,0.1418015956878662
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,48,2,128,1,float16,fp8,0,0.14118399620056152
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,48,2,128,1,fp8,fp8,0,0.14050400257110596
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,48,4,128,1,float16,float16,0,0.14311039447784424
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,48,4,128,1,float16,fp8,0,0.14154720306396484
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,48,8,128,1,float16,float16,0,0.14477920532226562
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,48,8,128,1,float16,fp8,0,0.14223999977111818
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,48,8,128,1,fp8,fp8,0,0.141975998878479
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,48,48,128,1,float16,float16,0,0.09077280163764953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,48,48,128,1,float16,fp8,0,0.08539680242538453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,48,1,128,1,float16,float16,0,0.07602559924125671
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,48,1,128,1,float16,fp8,0,0.07493600249290466
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,48,1,128,1,fp8,fp8,0,0.07490559816360473
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,48,2,128,1,float16,float16,0,0.07591999769210815
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,48,2,128,1,fp8,fp8,0,0.07493119835853576
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,48,4,128,1,float16,float16,0,0.07590240240097046
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,48,4,128,1,float16,fp8,0,0.07466560006141662
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,48,4,128,1,fp8,fp8,0,0.07488319873809815
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,48,8,128,1,float16,float16,0,0.0772000014781952
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,48,8,128,1,float16,float16,0,0.01459839940071106
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,48,8,128,1,fp8,fp8,0,0.07462559938430786
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,48,48,128,1,float16,float16,0,0.047336000204086306
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,48,48,128,1,float16,fp8,0,0.04729920029640198
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,48,8,128,1,float16,float16,0,0.274785590171814
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,48,48,128,1,fp8,fp8,0,0.047358399629592894
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,48,1,128,1,float16,float16,0,0.04323199987411499
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,48,1,128,1,float16,fp8,0,0.041387200355529785
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,48,1,128,1,fp8,fp8,0,0.04179840087890625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,48,2,128,1,float16,fp8,0,0.041203200817108154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,48,2,128,1,float16,float16,0,0.04333919882774353
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,48,2,128,1,fp8,fp8,0,0.04170880019664765
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,48,4,128,1,float16,float16,0,0.043105599284172055
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,48,4,128,1,float16,fp8,0,0.04286400079727173
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,48,8,128,1,float16,float16,0,0.04362240135669708
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,48,8,128,1,float16,fp8,0,0.04284639954566956
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,48,8,128,1,fp8,fp8,0,0.04233759939670563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,48,48,128,1,float16,float16,0,0.02892000079154968
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,48,48,128,1,float16,fp8,0,0.02887359857559204
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,48,48,128,1,fp8,fp8,0,0.028947201371192933
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,48,1,128,1,fp8,fp8,0,0.14223040342330934
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,48,1,128,1,float16,float16,0,0.026795199513435362
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,48,1,128,1,float16,fp8,0,0.026849600672721862
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,48,1,128,1,fp8,fp8,0,0.026067200303077697
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,48,2,128,1,float16,fp8,0,0.026395198702812196
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,48,2,128,1,fp8,fp8,0,0.025814399123191833
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,48,4,128,1,fp8,fp8,0,0.14173760414123535
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,48,4,128,1,float16,float16,0,0.026796799898147584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,48,4,128,1,float16,fp8,0,0.02603999972343445
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,48,4,128,1,fp8,fp8,0,0.02606880068778992
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,48,8,128,1,float16,float16,0,0.02686559855937958
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,48,8,128,1,float16,fp8,0,0.025996801257133485
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,48,8,128,1,fp8,fp8,0,0.02620159983634949
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,48,48,128,1,float16,float16,0,0.01865759938955307
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,48,48,128,1,float16,fp8,0,0.018651199340820313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,48,48,128,1,fp8,fp8,0,0.01857919991016388
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,48,48,128,1,fp8,fp8,0,0.08592640161514283
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,48,1,128,1,float16,fp8,0,0.016572800278663636
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,48,1,128,1,float16,float16,0,0.016599999368190767
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,48,2,128,1,float16,fp8,0,0.016574400663375854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,48,2,128,1,fp8,fp8,0,0.016641600430011748
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,48,4,128,1,float16,float16,0,0.016590400040149687
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,48,4,128,1,float16,fp8,0,0.016574400663375854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,48,4,128,1,fp8,fp8,0,0.016683200001716615
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,48,2,128,1,float16,fp8,0,0.07481120228767395
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,48,8,128,1,float16,fp8,0,0.016604800522327424
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,48,8,128,1,fp8,fp8,0,0.016547200083732606
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,48,48,128,1,float16,float16,0,0.014569599926471711
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,48,48,128,1,fp8,fp8,0,0.014539200067520141
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,48,1,128,1,float16,float16,0,0.01289760023355484
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,48,1,128,1,float16,fp8,0,0.01310880035161972
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,48,1,128,1,fp8,fp8,0,0.01319040060043335
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,48,2,128,1,float16,float16,0,0.013092799484729767
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,48,2,128,1,float16,fp8,0,0.012516799569129943
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,48,8,128,1,float16,fp8,0,0.07475200295448303
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,48,2,128,1,fp8,fp8,0,0.012510399520397186
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,48,4,128,1,float16,float16,0,0.01249919980764389
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,48,4,128,1,fp8,fp8,0,0.012494400143623352
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,48,8,128,1,float16,float16,0,0.012459199875593185
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,48,8,128,1,float16,fp8,0,0.013107199966907502
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,48,4,128,1,fp8,fp8,0,0.041643199324607846
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,48,8,128,1,fp8,fp8,0,0.01255200058221817
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,48,48,128,1,float16,float16,0,0.011336000263690948
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,48,48,128,1,float16,fp8,0,0.010419200360774993
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,48,48,128,1,fp8,fp8,0,0.010545600205659866
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,48,1,128,1,float16,float16,0,0.010487999767065048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,48,2,128,1,float16,float16,0,0.026467201113700867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,48,1,128,1,float16,fp8,0,0.010500799864530563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,48,1,128,1,fp8,fp8,0,0.010372799634933472
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,48,2,128,1,float16,float16,0,0.01045759990811348
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,48,2,128,1,float16,fp8,0,0.010395199805498124
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,48,2,128,1,fp8,fp8,0,0.010489600151777268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,48,4,128,1,float16,float16,0,0.010380800068378448
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,48,4,128,1,float16,fp8,0,0.01051200032234192
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,48,8,128,1,float16,float16,0,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,48,4,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,48,8,128,1,float16,fp8,0,0.010375999659299851
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,48,8,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,48,1,128,1,fp8,fp8,0,0.01661760061979294
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,48,48,128,1,float16,float16,0,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,48,48,128,1,float16,fp8,0,0.010833600163459777
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,48,1,128,1,float16,float16,0,0.010369600355625152
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,48,2,128,1,float16,float16,0,0.016672000288963318
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,48,1,128,1,float16,fp8,0,0.010369600355625152
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,48,1,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,48,2,128,1,float16,float16,0,0.010391999781131745
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,48,2,128,1,fp8,fp8,0,0.010366400331258773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,48,4,128,1,float16,float16,0,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,48,4,128,1,float16,fp8,0,0.010363200306892395
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,48,4,128,1,fp8,fp8,0,0.010475199669599533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,48,8,128,1,float16,fp8,0,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,48,48,128,1,float16,fp8,0,0.014448000490665436
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,48,8,128,1,fp8,fp8,0,0.010406400263309478
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,48,1,128,1,float16,float16,0,0.26014080047607424
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,48,1,128,1,float16,fp8,0,0.2501120090484619
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,48,4,128,1,float16,fp8,0,0.012535999715328216
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,48,1,128,1,fp8,fp8,0,0.2507008075714111
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,48,2,128,1,float16,float16,0,0.2588335990905762
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,48,2,128,1,float16,fp8,0,0.2500335931777954
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,48,2,128,1,fp8,fp8,0,0.25064160823822024
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,48,4,128,1,float16,float16,0,0.26032960414886475
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,48,4,128,1,float16,fp8,0,0.25070240497589114
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,48,4,128,1,fp8,fp8,0,0.25093119144439696
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,48,8,128,1,float16,float16,0,0.2628959894180298
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,48,8,128,1,float16,fp8,0,0.2508352041244507
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,48,8,128,1,fp8,fp8,0,0.2509376049041748
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,48,48,128,1,float16,float16,0,0.15023519992828369
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,48,48,128,1,float16,fp8,0,0.1415071964263916
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,48,48,128,1,fp8,fp8,0,0.1411952018737793
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,48,1,128,1,float16,float16,0,0.1358847975730896
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,48,1,128,1,float16,fp8,0,0.1299232006072998
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,48,1,128,1,fp8,fp8,0,0.13005759716033935
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,48,2,128,1,float16,fp8,0,0.13029600381851197
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,48,2,128,1,fp8,fp8,0,0.13004159927368164
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,48,4,128,1,float16,float16,0,0.1366111993789673
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,48,4,128,1,float16,fp8,0,0.13004640340805054
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,48,4,128,1,fp8,fp8,0,0.13012160062789918
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,48,8,128,1,float16,float16,0,0.13702239990234374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,48,8,128,1,float16,fp8,0,0.13004000186920167
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,48,2,128,1,float16,fp8,0,0.0103472001850605
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,48,8,128,1,fp8,fp8,0,0.1299631953239441
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,48,8,128,1,float16,float16,0,0.01660960018634796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,48,48,128,1,float16,fp8,0,0.0743071973323822
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,48,8,128,1,float16,float16,0,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,48,48,128,1,fp8,fp8,0,0.07445920109748841
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,48,1,128,1,float16,float16,0,0.07355520129203796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,48,1,128,1,float16,fp8,0,0.0689184010028839
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,48,1,128,1,fp8,fp8,0,0.06906239986419678
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,48,2,128,1,float16,float16,0,0.07378559708595275
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,48,2,128,1,fp8,fp8,0,0.0688256025314331
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,48,2,128,1,float16,fp8,0,0.06966080069541931
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,48,4,128,1,float16,float16,0,0.07370399832725524
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,48,4,128,1,float16,fp8,0,0.06927199959754944
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,48,4,128,1,fp8,fp8,0,0.06883999705314636
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,48,8,128,1,float16,float16,0,0.07477599978446961
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,48,8,128,1,float16,fp8,0,0.06925280094146728
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,48,8,128,1,fp8,fp8,0,0.06916319727897643
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,48,48,128,1,float16,float16,0,0.04368799924850464
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,48,48,128,1,float16,fp8,0,0.04252159893512726
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,48,48,128,1,fp8,fp8,0,0.04169760048389435
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,48,1,128,1,float16,float16,0,0.04166879951953888
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,48,1,128,1,float16,fp8,0,0.03919999897480011
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,48,1,128,1,fp8,fp8,0,0.03951840102672577
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,48,2,128,1,float16,float16,0,0.04161440134048462
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,48,2,128,1,fp8,fp8,0,0.03905439972877502
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,48,4,128,1,float16,fp8,0,0.03912639915943146
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,48,4,128,1,float16,float16,0,0.042531201243400575
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,48,4,128,1,fp8,fp8,0,0.03924480080604553
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,48,8,128,1,float16,float16,0,0.04209440052509308
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,48,8,128,1,float16,fp8,0,0.039447999000549315
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,48,8,128,1,fp8,fp8,0,0.039134401082992556
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,48,48,128,1,float16,float16,0,0.026918399333953857
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,48,48,128,1,float16,fp8,0,0.026684799790382387
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,48,48,128,1,fp8,fp8,0,0.025939199328422546
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,48,1,128,1,float16,float16,0,0.0255295991897583
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,48,1,128,1,float16,fp8,0,0.024792000651359558
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,48,1,128,1,fp8,fp8,0,0.02468159943819046
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,48,2,128,1,float16,float16,0,0.025724801421165466
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,48,2,128,1,float16,fp8,0,0.024806399643421174
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,48,2,128,1,fp8,fp8,0,0.02479359954595566
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,48,4,128,1,float16,fp8,0,0.02479359954595566
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,48,8,128,1,float16,float16,0,0.02640799880027771
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,48,8,128,1,float16,fp8,0,0.024777600169181825
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,48,8,128,1,fp8,fp8,0,0.024792000651359558
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,48,48,128,1,float16,float16,0,0.018512000143527985
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,48,48,128,1,float16,fp8,0,0.01658719927072525
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,48,48,128,1,fp8,fp8,0,0.01659200042486191
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,48,2,128,1,float16,float16,0,0.1354864001274109
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,48,1,128,1,float16,float16,0,0.016571199893951415
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,48,1,128,1,float16,fp8,0,0.016495999693870545
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,48,1,128,1,fp8,fp8,0,0.016607999801635742
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,48,2,128,1,float16,float16,0,0.01658560037612915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,48,2,128,1,fp8,fp8,0,0.01659359931945801
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,48,4,128,1,float16,float16,0,0.01661760061979294
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,48,4,128,1,float16,fp8,0,0.016627199947834015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,48,48,128,1,fp8,fp8,0,0.010494399815797806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,48,4,128,1,fp8,fp8,0,0.016630400717258454
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,48,8,128,1,float16,float16,0,0.016673600673675536
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,48,8,128,1,float16,fp8,0,0.01653439998626709
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,48,48,128,1,float16,float16,0,0.07855839729309082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,48,48,128,1,float16,float16,0,0.014561599493026734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,48,48,128,1,float16,fp8,0,0.012510399520397186
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,48,1,128,1,float16,float16,0,0.012585599720478059
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,48,1,128,1,float16,fp8,0,0.012503999471664428
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,48,1,128,1,fp8,fp8,0,0.012540799379348756
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,48,2,128,1,float16,float16,0,0.012507200241088867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,48,2,128,1,float16,fp8,0,0.012510399520397186
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,48,2,128,1,fp8,fp8,0,0.012507200241088867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,48,4,128,1,float16,float16,0,0.012606400251388549
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,48,4,128,1,float16,fp8,0,0.012520000338554382
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,48,4,128,1,fp8,fp8,0,0.01250240057706833
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,48,8,128,1,float16,float16,0,0.012481600046157837
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,48,8,128,1,float16,fp8,0,0.01250080019235611
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,48,8,128,1,fp8,fp8,0,0.012483199685811996
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,48,48,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,48,48,128,1,float16,fp8,0,0.010388799756765366
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,48,48,128,1,fp8,fp8,0,0.010476800054311753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,48,4,128,1,fp8,fp8,0,0.024798400700092316
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,48,1,128,1,float16,float16,0,0.010518400371074677
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,48,1,128,1,float16,fp8,0,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,48,1,128,1,fp8,fp8,0,0.01043040007352829
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,48,2,128,1,float16,fp8,0,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,48,2,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,48,4,128,1,float16,float16,0,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,48,4,128,1,float16,fp8,0,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,48,4,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,48,8,128,1,float16,float16,0,0.010784000158309937
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,48,8,128,1,float16,fp8,0,0.010486400127410889
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,48,48,128,1,float16,float16,0,0.0105103999376297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,48,2,128,1,float16,fp8,0,0.01658080071210861
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,48,48,128,1,float16,fp8,0,0.01037440001964569
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,48,48,128,1,fp8,fp8,0,0.009014400094747544
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,48,1,128,1,float16,float16,0,0.008921600133180618
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,48,1,128,1,fp8,fp8,0,0.008455999940633774
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,48,1,128,1,float16,fp8,0,0.01037440001964569
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,48,2,128,1,float16,float16,0,0.008902399986982345
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,48,2,128,1,float16,fp8,0,0.010027199983596802
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,48,4,128,1,float16,float16,0,0.010414399951696397
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,48,4,128,1,float16,fp8,0,0.009539200365543366
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,48,8,128,1,fp8,fp8,0,0.01658719927072525
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,48,4,128,1,fp8,fp8,0,0.008430399745702744
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,48,8,128,1,float16,float16,0,0.009481599926948548
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,48,8,128,1,float16,fp8,0,0.008395200222730636
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,48,8,128,1,fp8,fp8,0,0.008700799942016602
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,48,1,128,1,float16,float16,0,0.25566558837890624
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,48,1,128,1,float16,fp8,0,0.24002881050109864
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,48,1,128,1,fp8,fp8,0,0.24056799411773683
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,48,2,128,1,float16,float16,0,0.2568511962890625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,48,2,128,1,float16,fp8,0,0.03935680091381073
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,48,2,128,1,float16,fp8,0,0.24035840034484862
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,48,2,128,1,fp8,fp8,0,0.24076321125030517
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,48,4,128,1,float16,float16,0,0.2566495895385742
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,48,4,128,1,float16,fp8,0,0.2402575969696045
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,48,4,128,1,fp8,fp8,0,0.24056479930877686
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,48,4,128,1,float16,float16,0,0.02579520046710968
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,48,8,128,1,float16,float16,0,0.2574575901031494
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,48,8,128,1,float16,fp8,0,0.24009439945220948
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,48,8,128,1,fp8,fp8,0,0.24045920372009277
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,48,48,128,1,float16,fp8,0,0.1266816020011902
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,48,48,128,1,fp8,fp8,0,0.12581599950790406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,48,1,128,1,float16,float16,0,0.13458399772644042
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,48,2,128,1,float16,float16,0,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,48,1,128,1,fp8,fp8,0,0.12571200132369995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,48,2,128,1,float16,float16,0,0.1347375988960266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,48,2,128,1,float16,fp8,0,0.12543519735336303
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,48,2,128,1,fp8,fp8,0,0.1256976008415222
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,48,4,128,1,float16,fp8,0,0.12571680545806885
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,48,8,128,1,fp8,fp8,0,0.0103472001850605
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,48,4,128,1,fp8,fp8,0,0.12604639530181885
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,48,8,128,1,float16,float16,0,0.13406239748001098
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,48,8,128,1,float16,fp8,0,0.12526559829711914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,48,8,128,1,fp8,fp8,0,0.12550560235977173
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,48,48,128,1,float16,fp8,0,0.0677839994430542
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,48,2,128,1,fp8,fp8,0,0.008420799672603608
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,48,1,128,1,float16,float16,0,0.072326397895813
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,48,1,128,1,float16,fp8,0,0.06796799898147583
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,48,1,128,1,fp8,fp8,0,0.06740639805793762
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,48,2,128,1,float16,float16,0,0.07306079864501953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,48,2,128,1,float16,fp8,0,0.06721600294113159
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,48,48,128,1,fp8,fp8,0,0.012515200674533844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,48,2,128,1,fp8,fp8,0,0.06851840019226074
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,48,4,128,1,float16,float16,0,0.07218559980392455
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,48,4,128,1,float16,fp8,0,0.0685696005821228
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,48,4,128,1,fp8,fp8,0,0.06759039759635925
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,48,8,128,1,float16,float16,0,0.07293760180473327
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,48,8,128,1,float16,fp8,0,0.06778879761695862
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,48,8,128,1,fp8,fp8,0,0.06803039908409118
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,48,48,128,1,float16,float16,0,0.04254559874534607
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,48,48,128,1,float16,fp8,0,0.039103999733924866
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,48,48,128,1,fp8,fp8,0,0.039131200313568114
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,48,1,128,1,float16,float16,0,0.041280001401901245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,48,1,128,1,fp8,fp8,0,0.03882719874382019
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,48,1,128,1,float16,fp8,0,0.03821600079536438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,48,2,128,1,float16,float16,0,0.041335999965667725
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,48,2,128,1,float16,fp8,0,0.03791039884090423
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,48,2,128,1,fp8,fp8,0,0.038910400867462155
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,48,4,128,1,float16,float16,0,0.041150400042533876
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,48,4,128,1,float16,fp8,0,0.039134401082992556
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,48,4,128,1,fp8,fp8,0,0.038441601395607
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,48,8,128,1,float16,fp8,0,0.03809759914875031
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,48,48,128,1,float16,float16,0,0.13490560054779052
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,48,8,128,1,fp8,fp8,0,0.03835520148277283
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,48,48,128,1,float16,float16,0,0.026873600482940675
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,48,48,128,1,float16,fp8,0,0.023563200235366823
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,48,48,128,1,fp8,fp8,0,0.024689599871635437
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,48,1,128,1,float16,fp8,0,0.12536959648132323
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,48,1,128,1,float16,fp8,0,0.023788799345493317
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,48,1,128,1,fp8,fp8,0,0.024398399889469145
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,48,2,128,1,float16,float16,0,0.024876800179481507
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,48,2,128,1,float16,fp8,0,0.02465600073337555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,48,2,128,1,fp8,fp8,0,0.022937600314617158
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,48,4,128,1,float16,float16,0,0.024817599356174468
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,48,4,128,1,float16,fp8,0,0.022860799729824067
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,48,4,128,1,fp8,fp8,0,0.024695999920368195
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,48,8,128,1,float16,float16,0,0.02483839988708496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,48,8,128,1,float16,fp8,0,0.023758399486541747
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,48,8,128,1,fp8,fp8,0,0.022782400250434875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,48,48,128,1,float16,fp8,0,0.015299199521541596
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,48,48,128,1,float16,float16,0,0.07434719800949097
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,48,48,128,1,fp8,fp8,0,0.015607999265193939
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,48,1,128,1,float16,float16,0,0.016596800088882445
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,48,48,128,1,fp8,fp8,0,0.06815360188484192
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,48,1,128,1,float16,fp8,0,0.014627200365066529
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,48,1,128,1,fp8,fp8,0,0.015091200172901154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,48,2,128,1,float16,fp8,0,0.01472959965467453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,48,2,128,1,fp8,fp8,0,0.015150399506092071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,48,4,128,1,float16,fp8,0,0.015291200578212738
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,48,4,128,1,fp8,fp8,0,0.015436799824237823
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,48,8,128,1,float16,float16,0,0.01659200042486191
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,48,8,128,1,float16,fp8,0,0.015447999536991119
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,48,8,128,1,fp8,fp8,0,0.016531200706958772
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,48,48,128,1,float16,float16,0,0.014580799639225006
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,48,48,128,1,float16,fp8,0,0.012555199861526489
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,48,48,128,1,fp8,fp8,0,0.012511999905109405
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,48,1,128,1,float16,float16,0,0.01255200058221817
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,48,1,128,1,fp8,fp8,0,0.012534399330615998
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,48,2,128,1,float16,float16,0,0.012481600046157837
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,48,2,128,1,float16,fp8,0,0.012513600289821625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,48,2,128,1,fp8,fp8,0,0.012503999471664428
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,48,8,128,1,float16,float16,0,0.04161120057106018
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,48,4,128,1,float16,float16,0,0.012518399953842163
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,48,4,128,1,float16,fp8,0,0.01249919980764389
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,48,4,128,1,fp8,fp8,0,0.01250240057706833
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,48,8,128,1,float16,float16,0,0.012544000148773193
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,48,8,128,1,float16,fp8,0,0.012531200051307678
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,48,8,128,1,fp8,fp8,0,0.01257600039243698
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,48,48,128,1,float16,float16,0,0.012747199833393097
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,48,48,128,1,float16,fp8,0,0.010436800122261048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,48,1,128,1,float16,float16,0,0.024740800261497498
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,48,48,128,1,fp8,fp8,0,0.010513599961996078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,48,1,128,1,float16,float16,0,0.010407999902963639
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,48,1,128,1,float16,fp8,0,0.010366400331258773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,48,1,128,1,fp8,fp8,0,0.010494399815797806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,48,2,128,1,float16,fp8,0,0.010425599664449692
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,48,2,128,1,fp8,fp8,0,0.010540799796581268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,48,4,128,1,float16,float16,0,0.010387200117111205
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,48,4,128,1,float16,fp8,0,0.010366400331258773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,48,4,128,1,fp8,fp8,0,0.010355199873447418
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,48,8,128,1,float16,float16,0,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,48,8,128,1,float16,fp8,0,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,48,8,128,1,fp8,fp8,0,0.010471999645233154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,48,48,128,1,float16,float16,0,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,48,48,128,1,float16,fp8,0,0.010153599828481675
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,48,48,128,1,float16,float16,0,0.018532800674438476
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,48,48,128,1,fp8,fp8,0,0.010395199805498124
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,48,1,128,1,fp8,fp8,0,0.010387200117111205
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,48,1,128,1,float16,fp8,0,0.008934400230646133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,48,1,128,1,float16,float16,0,0.010444799810647965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,48,2,128,1,float16,fp8,0,0.009137599915266036
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,48,2,128,1,fp8,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,48,2,128,1,float16,float16,0,0.010353600233793258
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,48,4,128,1,float16,float16,0,0.010361599922180175
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,48,2,128,1,float16,float16,0,0.016625599563121797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,48,4,128,1,float16,fp8,0,0.010444799810647965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,48,4,128,1,fp8,fp8,0,0.00942239984869957
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,48,8,128,1,float16,float16,0,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,48,4,128,1,float16,float16,0,0.016630400717258454
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,48,8,128,1,float16,fp8,0,0.00952960029244423
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,48,8,128,1,fp8,fp8,0,0.010391999781131745
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,48,1,128,1,float16,fp8,0,0.012513600289821625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,48,2,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,48,4,128,1,float16,float16,0,0.1343008041381836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,1,128,1,fp8,fp8,0,13.9864501953125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,1,128,1,float16,fp8,0,14.00977783203125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,2,128,1,float16,fp8,0,14.011152648925782
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,2,128,1,fp8,fp8,0,14.175559997558594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,1,128,1,float16,float16,0,17.835774230957032
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,2,128,1,float16,float16,0,17.99940643310547
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,4,128,1,float16,fp8,0,14.068089294433594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,4,128,1,float16,float16,0,18.230267333984376
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,40,128,1,float16,fp8,0,7.328540802001953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,4,128,1,fp8,fp8,0,14.499586486816407
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,8,128,1,float16,fp8,0,14.571527099609375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,40,128,1,fp8,fp8,0,7.343550109863282
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,1,128,1,float16,float16,0,9.215289306640624
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,8,128,1,fp8,fp8,0,14.707563781738282
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,8,128,1,float16,float16,0,18.486753845214842
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,1,128,1,float16,fp8,0,7.04150390625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,1,128,1,fp8,fp8,0,7.131804656982422
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,2,128,1,float16,fp8,0,7.053099060058594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,2,128,1,fp8,fp8,0,7.198230743408203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,2,128,1,float16,float16,0,9.202022552490234
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,4,128,1,float16,fp8,0,7.167467498779297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,4,128,1,float16,float16,0,9.348411560058594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,4,128,1,fp8,fp8,0,7.318708801269532
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,8,128,1,float16,fp8,0,7.275318145751953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,40,128,1,float16,fp8,0,3.760956954956055
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,8,128,1,fp8,fp8,0,7.170417785644531
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,8,128,1,float16,float16,0,9.341585540771485
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,40,128,1,fp8,fp8,0,3.93054084777832
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,1,128,1,float16,float16,0,4.540300750732422
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,1,128,1,float16,fp8,0,3.6178497314453124
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,1,128,1,fp8,fp8,0,3.612838363647461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,2,128,1,fp8,fp8,0,3.6518718719482424
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,2,128,1,float16,fp8,0,3.724537658691406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,2,128,1,float16,float16,0,4.7132209777832035
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,4,128,1,float16,fp8,0,3.638955307006836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,4,128,1,fp8,fp8,0,3.620191955566406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,4,128,1,float16,float16,0,4.598662567138672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,8,128,1,float16,fp8,0,3.6560176849365233
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,8,128,1,fp8,fp8,0,3.868592071533203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,8,128,1,float16,float16,0,4.662620925903321
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,40,128,1,float16,fp8,0,1.9365455627441406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,40,128,1,fp8,fp8,0,2.3997440338134766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,1,128,1,float16,float16,0,2.1416704177856447
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,1,128,1,float16,fp8,0,1.8416015625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,2,128,1,float16,float16,0,2.115350341796875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,1,128,1,fp8,fp8,0,2.2447744369506837
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,2,128,1,float16,fp8,0,1.8686399459838867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,2,128,1,fp8,fp8,0,2.250555229187012
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,4,128,1,float16,float16,0,2.2456127166748048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,4,128,1,fp8,fp8,0,1.8532047271728516
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,4,128,1,float16,fp8,0,2.069278335571289
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,8,128,1,float16,float16,0,2.228998374938965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,8,128,1,float16,fp8,0,1.8478080749511718
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,8,128,1,fp8,fp8,0,2.1771039962768555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,1,128,1,float16,fp8,0,8.291836547851563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,1,128,1,fp8,fp8,0,8.211032104492187
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,2,128,1,float16,fp8,0,8.222583770751953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,1,128,1,float16,float16,0,10.648000335693359
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,2,128,1,float16,float16,0,10.450830078125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,2,128,1,fp8,fp8,0,8.335916900634766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,4,128,1,float16,fp8,0,8.401761627197265
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,4,128,1,fp8,fp8,0,8.453900909423828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,4,128,1,float16,float16,0,10.707908630371094
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,8,128,1,float16,float16,0,10.722551727294922
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,40,128,1,float16,fp8,0,4.495292663574219
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,8,128,1,float16,fp8,0,8.350507354736328
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,40,128,1,fp8,fp8,0,4.45918083190918
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,8,128,1,fp8,fp8,0,8.43252944946289
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,1,128,1,float16,fp8,0,4.1754814147949215
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,1,128,1,float16,float16,0,5.2554176330566404
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,1,128,1,fp8,fp8,0,4.133692932128906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,2,128,1,float16,float16,0,5.3081825256347654
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,2,128,1,float16,fp8,0,4.172294235229492
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,2,128,1,fp8,fp8,0,4.245435333251953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,4,128,1,float16,float16,0,5.2170158386230465
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,4,128,1,float16,fp8,0,4.362128067016601
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,4,128,1,fp8,fp8,0,4.213056182861328
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,8,128,1,float16,float16,0,5.337454223632813
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,8,128,1,float16,fp8,0,4.29716796875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,8,128,1,fp8,fp8,0,4.200004959106446
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,40,128,1,float16,fp8,0,2.4447168350219726
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,40,128,1,fp8,fp8,0,2.2254751205444334
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,1,128,1,float16,float16,0,2.441655921936035
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,1,128,1,float16,fp8,0,2.1060943603515625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,1,128,1,fp8,fp8,0,2.0960607528686523
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,2,128,1,float16,fp8,0,2.087816047668457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,2,128,1,float16,float16,0,2.7153743743896483
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,2,128,1,fp8,fp8,0,2.115507125854492
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,4,128,1,float16,float16,0,2.558625602722168
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,4,128,1,float16,fp8,0,2.2811792373657225
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,4,128,1,fp8,fp8,0,2.1346511840820312
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,8,128,1,float16,fp8,0,2.116124725341797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,8,128,1,float16,float16,0,2.6672096252441406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,8,128,1,fp8,fp8,0,2.107456016540527
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,40,128,1,float16,fp8,0,1.310324764251709
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,40,128,1,fp8,fp8,0,1.1463184356689453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,1,128,1,float16,fp8,0,1.0896927833557128
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,1,128,1,float16,float16,0,1.2450752258300781
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,1,128,1,fp8,fp8,0,1.086411190032959
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,2,128,1,float16,float16,0,1.2629712104797364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,2,128,1,float16,fp8,0,1.2143535614013672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,2,128,1,fp8,fp8,0,1.130833625793457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,4,128,1,float16,float16,0,1.244375991821289
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,4,128,1,float16,fp8,0,1.0843232154846192
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,4,128,1,fp8,fp8,0,1.256980800628662
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,8,128,1,float16,float16,0,1.2485631942749023
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,8,128,1,float16,fp8,0,1.1449952125549316
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,8,128,1,fp8,fp8,0,1.2199824333190918
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,1,128,1,float16,fp8,0,5.7682350158691404
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,1,128,1,float16,float16,0,7.387519836425781
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,1,128,1,fp8,fp8,0,5.797492980957031
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,2,128,1,float16,float16,0,7.409817504882812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,2,128,1,float16,fp8,0,5.900976181030273
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,2,128,1,fp8,fp8,0,5.937483215332032
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,4,128,1,float16,float16,0,7.547812652587891
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,4,128,1,float16,fp8,0,5.931695938110352
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,4,128,1,fp8,fp8,0,5.921433639526367
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,8,128,1,float16,float16,0,7.557585906982422
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,8,128,1,float16,fp8,0,5.885943984985351
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,8,128,1,fp8,fp8,0,6.050569534301758
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,40,128,1,float16,fp8,0,3.1723039627075194
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,40,128,1,fp8,fp8,0,3.182472038269043
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,1,128,1,float16,float16,0,3.4677806854248048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,1,128,1,float16,fp8,0,3.1695440292358397
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,1,128,1,fp8,fp8,0,2.9859312057495115
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,2,128,1,float16,float16,0,3.703201675415039
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,2,128,1,float16,fp8,0,3.2623550415039064
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,2,128,1,fp8,fp8,0,2.903788757324219
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,40,128,1,float16,float16,0,1.3044608116149903
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,40,128,1,float16,float16,0,2.22316951751709
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,4,128,1,float16,float16,0,3.6987857818603516
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,4,128,1,float16,fp8,0,2.987233543395996
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,40,128,1,float16,float16,0,3.759419250488281
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,40,128,1,float16,float16,0,4.593718338012695
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,40,128,1,float16,float16,0,1.9511983871459961
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,40,128,1,float16,float16,0,5.380806350708008
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,40,128,1,float16,float16,0,9.278860473632813
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,8,128,1,float16,fp8,0,2.9082752227783204
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,4,128,1,fp8,fp8,0,3.0120208740234373
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,8,128,1,fp8,fp8,0,3.1056880950927734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,40,128,1,float16,float16,0,2.623107147216797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,8,128,1,float16,float16,0,3.754715347290039
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,40,128,1,float16,fp8,0,1.7308656692504882
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,40,128,1,fp8,fp8,0,1.7594591140747071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,1,128,1,float16,fp8,0,1.497003173828125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,1,128,1,fp8,fp8,0,1.488708782196045
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,1,128,1,float16,float16,0,1.7823167800903321
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,2,128,1,float16,float16,0,1.7107200622558594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,2,128,1,float16,fp8,0,1.6937360763549805
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,2,128,1,fp8,fp8,0,1.6819631576538085
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,4,128,1,float16,fp8,0,1.5171199798583985
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,4,128,1,float16,float16,0,1.6884511947631835
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,4,128,1,fp8,fp8,0,1.4864656448364257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,8,128,1,float16,float16,0,1.6867040634155273
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,40,128,1,float16,float16,0,0.9505167961120605
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,40,128,1,float16,fp8,0,0.8697664260864257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,8,128,1,float16,fp8,0,1.6532112121582032
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,8,128,1,fp8,fp8,0,1.7366880416870116
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,1,128,1,float16,fp8,0,0.7841472148895263
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,40,128,1,fp8,fp8,0,0.8310463905334473
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,2,128,1,float16,fp8,0,0.8197520256042481
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,2,128,1,fp8,fp8,0,0.82192964553833
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,1,128,1,fp8,fp8,0,0.8701279640197754
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,1,128,1,float16,float16,0,0.8991727828979492
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,4,128,1,float16,float16,0,0.8891280174255372
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,4,128,1,float16,fp8,0,0.7845424175262451
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,4,128,1,fp8,fp8,0,0.7860703945159913
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,8,128,1,float16,fp8,0,0.8039104461669921
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,8,128,1,fp8,fp8,0,0.8229231834411621
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,2,128,1,float16,float16,0,0.8815135955810547
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,8,128,1,float16,float16,0,0.8916687965393066
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,1,128,1,float16,fp8,0,7.651451110839844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,1,128,1,fp8,fp8,0,7.6708930969238285
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,2,128,1,float16,fp8,0,7.620391845703125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,2,128,1,fp8,fp8,0,7.596300506591797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,1,128,1,float16,float16,0,9.786121368408203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,2,128,1,float16,float16,0,9.65505599975586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,4,128,1,float16,fp8,0,7.618852996826172
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,4,128,1,float16,float16,0,9.512091064453125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,40,128,1,float16,fp8,0,4.371865463256836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,40,128,1,float16,float16,0,5.24054069519043
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,4,128,1,fp8,fp8,0,7.708489227294922
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,8,128,1,float16,fp8,0,7.7259071350097654
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,8,128,1,fp8,fp8,0,7.705577850341797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,40,128,1,fp8,fp8,0,4.134630584716797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,1,128,1,float16,float16,0,4.742433547973633
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,8,128,1,float16,float16,0,9.936265563964843
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,1,128,1,float16,fp8,0,3.820811080932617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,1,128,1,fp8,fp8,0,4.037099075317383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,2,128,1,float16,fp8,0,3.899435043334961
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,2,128,1,fp8,fp8,0,3.8694286346435547
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,2,128,1,float16,float16,0,4.897510528564453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,4,128,1,float16,float16,0,4.925385665893555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,4,128,1,float16,fp8,0,4.054241561889649
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,4,128,1,fp8,fp8,0,4.074327850341797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,40,128,1,float16,float16,0,2.4802223205566407
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,40,128,1,float16,fp8,0,2.4201328277587892
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,8,128,1,float16,fp8,0,3.828403091430664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,8,128,1,float16,float16,0,4.937823867797851
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,40,128,1,fp8,fp8,0,2.396004867553711
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,8,128,1,fp8,fp8,0,3.899708938598633
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,1,128,1,float16,float16,0,2.3388544082641602
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,1,128,1,float16,fp8,0,1.949380874633789
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,1,128,1,fp8,fp8,0,2.1483184814453127
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,2,128,1,float16,float16,0,2.2345455169677733
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,2,128,1,fp8,fp8,0,1.9742448806762696
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,2,128,1,float16,fp8,0,2.217393684387207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,4,128,1,fp8,fp8,0,1.9270816802978517
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,4,128,1,float16,float16,0,2.1908063888549805
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,4,128,1,float16,fp8,0,2.1387727737426756
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,8,128,1,float16,float16,0,2.242359924316406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,40,128,1,float16,fp8,0,1.06669282913208
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,1,128,1,float16,float16,0,1.1301504135131837
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,8,128,1,float16,fp8,0,2.2562271118164063
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,40,128,1,fp8,fp8,0,1.258236789703369
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,8,128,1,fp8,fp8,0,1.948209571838379
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,1,128,1,fp8,fp8,0,0.9943615913391113
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,1,128,1,float16,fp8,0,1.2708191871643066
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,2,128,1,float16,fp8,0,1.0252032279968262
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,2,128,1,float16,float16,0,1.1885552406311035
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,2,128,1,fp8,fp8,0,1.0338335990905763
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,4,128,1,float16,float16,0,1.136406421661377
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,4,128,1,float16,fp8,0,0.9968223571777344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,4,128,1,fp8,fp8,0,1.0705408096313476
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,8,128,1,float16,float16,0,1.1410655975341797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,8,128,1,float16,fp8,0,1.197987174987793
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,40,128,1,float16,fp8,0,0.5716032028198242
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,40,128,1,fp8,fp8,0,0.5708127975463867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,1,128,1,float16,float16,0,0.5823999881744385
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,1,128,1,float16,fp8,0,0.5877200126647949
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,1,128,1,fp8,fp8,0,0.5819024085998535
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,40,128,1,float16,float16,0,1.2421216011047362
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,2,128,1,fp8,fp8,0,0.5394752025604248
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,4,128,1,float16,float16,0,0.6054880142211914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,4,128,1,float16,fp8,0,0.5538767814636231
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,4,128,1,fp8,fp8,0,0.5321728229522705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,8,128,1,float16,float16,0,0.5889200210571289
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,8,128,1,float16,fp8,0,0.5534448146820068
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,8,128,1,fp8,fp8,0,0.5396063804626465
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,40,128,1,float16,float16,0,0.7221744060516357
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,2,128,1,float16,fp8,0,0.5344111919403076
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,1,128,1,float16,fp8,0,4.529689788818359
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,1,128,1,fp8,fp8,0,4.43785285949707
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,1,128,1,float16,float16,0,5.5122112274169925
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,8,128,1,fp8,fp8,0,1.0316752433776855
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,2,128,1,float16,float16,0,5.549711990356445
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,2,128,1,float16,fp8,0,4.471665573120117
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,2,128,1,float16,float16,0,0.5837456226348877
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,2,128,1,fp8,fp8,0,4.455543899536133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,4,128,1,float16,fp8,0,4.480519866943359
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,40,128,1,float16,float16,0,2.9005760192871093
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,4,128,1,fp8,fp8,0,4.581886291503906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,4,128,1,float16,float16,0,5.697990417480469
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,8,128,1,float16,fp8,0,4.58946418762207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,8,128,1,fp8,fp8,0,4.4804847717285154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,8,128,1,float16,float16,0,5.546406555175781
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,40,128,1,fp8,fp8,0,2.4797183990478517
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,1,128,1,float16,fp8,0,2.2708080291748045
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,1,128,1,float16,float16,0,2.7243471145629883
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,1,128,1,fp8,fp8,0,2.4800304412841796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,2,128,1,float16,float16,0,2.5259679794311523
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,2,128,1,float16,fp8,0,2.2918064117431642
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,2,128,1,fp8,fp8,0,2.4305728912353515
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,40,128,1,float16,fp8,0,2.4647920608520506
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,4,128,1,float16,float16,0,2.7262895584106444
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,4,128,1,float16,fp8,0,2.302284812927246
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,4,128,1,fp8,fp8,0,2.42968635559082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,40,128,1,float16,fp8,0,1.258187198638916
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,40,128,1,float16,float16,0,1.4411375999450684
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,8,128,1,float16,float16,0,2.7678176879882814
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,8,128,1,fp8,fp8,0,2.284774398803711
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,8,128,1,float16,fp8,0,2.556670379638672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,1,128,1,float16,float16,0,1.3041983604431153
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,40,128,1,fp8,fp8,0,1.5069087982177733
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,1,128,1,float16,fp8,0,1.165223979949951
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,1,128,1,fp8,fp8,0,1.1499216079711914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,2,128,1,float16,float16,0,1.3458975791931151
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,2,128,1,fp8,fp8,0,1.1754591941833497
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,2,128,1,float16,fp8,0,1.4285696029663086
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,4,128,1,float16,float16,0,1.294065570831299
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,4,128,1,fp8,fp8,0,1.1906512260437012
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,8,128,1,float16,fp8,0,1.1506128311157227
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,8,128,1,float16,float16,0,1.3068592071533203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,40,128,1,float16,float16,0,0.746452808380127
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,40,128,1,float16,fp8,0,0.6931871891021728
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,40,128,1,fp8,fp8,0,0.6798543930053711
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,8,128,1,fp8,fp8,0,1.2842399597167968
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,1,128,1,float16,float16,0,0.6646192073822021
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,1,128,1,fp8,fp8,0,0.6059711933135986
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,2,128,1,float16,float16,0,0.6656943798065186
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,2,128,1,float16,fp8,0,0.6202720165252685
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,2,128,1,fp8,fp8,0,0.6576863765716553
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,4,128,1,float16,float16,0,0.669927978515625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,4,128,1,float16,fp8,0,0.6144032001495361
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,4,128,1,fp8,fp8,0,0.6422719955444336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,40,128,1,float16,float16,0,0.39739360809326174
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,8,128,1,float16,float16,0,0.7146272182464599
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,8,128,1,fp8,fp8,0,0.6310143947601319
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,40,128,1,float16,fp8,0,0.3702431917190552
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,40,128,1,fp8,fp8,0,0.36945600509643556
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,1,128,1,float16,float16,0,0.3588927984237671
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,1,128,1,float16,fp8,0,0.3739408016204834
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,1,128,1,fp8,fp8,0,0.33540480136871337
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,2,128,1,float16,float16,0,0.3578399896621704
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,2,128,1,float16,fp8,0,0.34155681133270266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,2,128,1,fp8,fp8,0,0.38553919792175295
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,4,128,1,float16,float16,0,0.3590703964233398
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,4,128,1,float16,fp8,0,0.33435521125793455
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,4,128,1,fp8,fp8,0,0.3406719923019409
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,8,128,1,float16,float16,0,0.37532479763031007
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,8,128,1,float16,fp8,0,0.3376703977584839
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,8,128,1,fp8,fp8,0,0.3316512107849121
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,4,128,1,float16,fp8,0,1.3271360397338867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,1,128,1,float16,fp8,0,0.6030735969543457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,8,128,1,float16,fp8,0,0.6103024005889892
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,1,128,1,fp8,fp8,0,4.2892608642578125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,1,128,1,float16,fp8,0,4.36279182434082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,1,128,1,float16,float16,0,5.121022415161133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,2,128,1,float16,fp8,0,4.32031364440918
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,2,128,1,float16,float16,0,5.288742446899414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,2,128,1,fp8,fp8,0,4.287721633911133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,4,128,1,float16,float16,0,5.16270866394043
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,40,128,1,float16,fp8,0,2.448635292053223
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,40,128,1,float16,float16,0,3.005081558227539
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,4,128,1,float16,fp8,0,4.28831672668457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,4,128,1,fp8,fp8,0,4.315585708618164
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,8,128,1,fp8,fp8,0,4.332304000854492
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,8,128,1,float16,fp8,0,4.448486328125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,8,128,1,float16,float16,0,5.284796905517578
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,40,128,1,fp8,fp8,0,2.6274784088134764
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,1,128,1,float16,float16,0,2.386244773864746
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,1,128,1,fp8,fp8,0,2.1648496627807616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,1,128,1,float16,fp8,0,2.436079978942871
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,2,128,1,float16,float16,0,2.4237295150756837
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,2,128,1,float16,fp8,0,2.176416015625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,2,128,1,fp8,fp8,0,2.189740753173828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,4,128,1,float16,float16,0,2.5904495239257814
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,4,128,1,float16,fp8,0,2.173494338989258
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,40,128,1,float16,float16,0,1.4383567810058593
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,4,128,1,fp8,fp8,0,2.41635684967041
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,8,128,1,float16,fp8,0,2.1798383712768556
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,1,128,1,float16,float16,0,1.184665584564209
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,8,128,1,float16,float16,0,2.6047840118408203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,8,128,1,fp8,fp8,0,2.5107919692993166
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,40,128,1,fp8,fp8,0,1.461184024810791
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,1,128,1,fp8,fp8,0,1.1219663619995117
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,1,128,1,float16,fp8,0,1.2017727851867677
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,2,128,1,float16,float16,0,1.2002367973327637
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,2,128,1,float16,fp8,0,1.2320048332214355
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,2,128,1,fp8,fp8,0,1.3277104377746582
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,4,128,1,float16,fp8,0,1.1451775550842285
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,4,128,1,fp8,fp8,0,1.1073920249938964
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,8,128,1,float16,float16,0,1.2587535858154297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,40,128,1,float16,fp8,0,0.6456607818603516
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,8,128,1,float16,fp8,0,1.1885600090026855
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,8,128,1,fp8,fp8,0,1.1303631782531738
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,40,128,1,fp8,fp8,0,0.645959997177124
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,1,128,1,float16,float16,0,0.7090688228607178
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,1,128,1,float16,fp8,0,0.5726064205169678
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,1,128,1,fp8,fp8,0,0.6005199909210205
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,40,128,1,float16,fp8,0,1.250584030151367
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,2,128,1,float16,fp8,0,0.5729951858520508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,4,128,1,float16,float16,0,0.6209375858306885
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,2,128,1,fp8,fp8,0,0.6935167789459229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,4,128,1,fp8,fp8,0,0.5708864212036133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,8,128,1,float16,float16,0,0.6263567924499511
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,8,128,1,float16,fp8,0,0.5745488166809082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,40,128,1,float16,float16,0,0.3749583959579468
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,40,128,1,float16,fp8,0,0.3448031902313232
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,8,128,1,fp8,fp8,0,0.6562719821929932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,40,128,1,fp8,fp8,0,0.3572527885437012
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,1,128,1,float16,fp8,0,0.3070751905441284
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,1,128,1,fp8,fp8,0,0.3220400094985962
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,4,128,1,float16,float16,0,1.2250592231750488
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,2,128,1,float16,float16,0,0.3284735918045044
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,2,128,1,float16,fp8,0,0.30675520896911623
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,2,128,1,fp8,fp8,0,0.32251040935516356
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,4,128,1,float16,float16,0,0.32806398868560793
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,4,128,1,fp8,fp8,0,0.30738880634307864
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,8,128,1,float16,float16,0,0.3391263961791992
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,40,128,1,float16,float16,0,0.7321584224700928
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,40,128,1,float16,float16,0,0.20989439487457276
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,8,128,1,float16,fp8,0,0.3094768047332764
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,8,128,1,fp8,fp8,0,0.3081664085388184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,40,128,1,fp8,fp8,0,0.19568639993667603
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,1,128,1,float16,float16,0,0.18094719648361207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,1,128,1,float16,fp8,0,0.17471200227737427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,1,128,1,fp8,fp8,0,0.17544000148773192
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,2,128,1,float16,float16,0,0.1807520031929016
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,2,128,1,fp8,fp8,0,0.17585279941558837
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,2,128,1,float16,float16,0,0.6081088066101075
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,4,128,1,float16,float16,0,0.18261760473251343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,4,128,1,float16,fp8,0,0.17530560493469238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,4,128,1,fp8,fp8,0,0.17400480508804322
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,8,128,1,float16,float16,0,0.18545600175857543
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,8,128,1,float16,fp8,0,0.17499040365219115
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,8,128,1,fp8,fp8,0,0.17541439533233644
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,1,128,1,float16,float16,0,0.32251999378204343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,4,128,1,float16,fp8,0,0.3158159971237183
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,40,128,1,float16,fp8,0,0.1941264033317566
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,1,128,1,float16,fp8,0,2.6179168701171873
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,1,128,1,fp8,fp8,0,2.622879981994629
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,1,128,1,float16,float16,0,3.020159912109375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,2,128,1,float16,fp8,0,0.17517440319061278
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,4,128,1,float16,fp8,0,0.5854671955108642
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,2,128,1,float16,float16,0,2.9248815536499024
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,2,128,1,float16,fp8,0,2.617144012451172
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,2,128,1,fp8,fp8,0,2.618190383911133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,4,128,1,fp8,fp8,0,2.6202112197875977
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,4,128,1,float16,float16,0,3.0523216247558596
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,4,128,1,float16,fp8,0,2.7910160064697265
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,8,128,1,float16,float16,0,3.035089683532715
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,40,128,1,float16,fp8,0,1.5394031524658203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,1,128,1,float16,float16,0,1.4348768234252929
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,40,128,1,float16,float16,0,1.7822864532470704
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,40,128,1,fp8,fp8,0,1.701211166381836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,8,128,1,float16,fp8,0,2.7743967056274412
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,8,128,1,fp8,fp8,0,2.6161615371704103
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,1,128,1,float16,fp8,0,1.5616047859191895
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,1,128,1,fp8,fp8,0,1.3266143798828125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,2,128,1,float16,float16,0,1.5120368003845215
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,2,128,1,float16,fp8,0,1.504964828491211
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,2,128,1,fp8,fp8,0,1.3487792015075684
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,4,128,1,float16,float16,0,1.4380847930908203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,4,128,1,float16,fp8,0,1.3296079635620117
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,4,128,1,fp8,fp8,0,1.3251935958862304
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,8,128,1,float16,float16,0,1.4843503952026367
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,40,128,1,float16,fp8,0,0.7890975952148438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,40,128,1,float16,float16,0,0.8955151557922363
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,1,128,1,float16,float16,0,0.7219359874725342
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,8,128,1,float16,fp8,0,1.3278351783752442
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,40,128,1,fp8,fp8,0,0.8215120315551758
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,1,128,1,float16,fp8,0,0.6787295818328858
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,8,128,1,fp8,fp8,0,1.4846847534179688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,2,128,1,float16,float16,0,0.7128543853759766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,1,128,1,fp8,fp8,0,0.7934447765350342
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,2,128,1,fp8,fp8,0,0.6976943969726562
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,2,128,1,float16,fp8,0,0.761678409576416
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,4,128,1,fp8,fp8,0,0.6790815830230713
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,8,128,1,float16,float16,0,0.7528592109680176
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,40,128,1,float16,float16,0,0.45664157867431643
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,40,128,1,float16,fp8,0,0.432371187210083
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,40,128,1,fp8,fp8,0,0.4102128028869629
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,8,128,1,float16,fp8,0,0.6814432144165039
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,8,128,1,fp8,fp8,0,0.7310143947601319
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,1,128,1,float16,float16,0,0.3860640048980713
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,1,128,1,float16,fp8,0,0.3664448022842407
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,1,128,1,fp8,fp8,0,0.3658639907836914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,2,128,1,float16,fp8,0,0.35752480030059813
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,2,128,1,fp8,fp8,0,0.36958239078521726
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,4,128,1,float16,float16,0,0.39348158836364744
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,4,128,1,float16,fp8,0,0.3594336032867432
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,4,128,1,fp8,fp8,0,0.36577279567718507
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,8,128,1,float16,float16,0,0.380731201171875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,8,128,1,float16,fp8,0,0.3627856016159058
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,40,128,1,float16,float16,0,0.24119999408721923
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,40,128,1,float16,fp8,0,0.2289520025253296
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,8,128,1,fp8,fp8,0,0.37781760692596433
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,40,128,1,fp8,fp8,0,0.22439360618591309
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,1,128,1,float16,float16,0,0.2055583953857422
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,1,128,1,float16,fp8,0,0.19610400199890138
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,1,128,1,fp8,fp8,0,0.2005311965942383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,2,128,1,float16,float16,0,0.2137631893157959
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,2,128,1,fp8,fp8,0,0.1968943953514099
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,4,128,1,float16,float16,0,0.20498239994049072
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,4,128,1,float16,fp8,0,0.19967999458312988
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,4,128,1,fp8,fp8,0,0.20555200576782226
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,8,128,1,float16,float16,0,0.20887200832366942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,8,128,1,float16,fp8,0,0.19647200107574464
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,8,128,1,fp8,fp8,0,0.1971119999885559
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,40,128,1,float16,fp8,0,0.12940479516983033
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,40,128,1,fp8,fp8,0,0.12926080226898193
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,1,128,1,float16,fp8,0,0.11461119651794434
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,1,128,1,fp8,fp8,0,0.1149664044380188
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,2,128,1,float16,float16,0,0.11935199499130249
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,2,128,1,float16,fp8,0,0.11477760076522828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,2,128,1,fp8,fp8,0,0.11465120315551758
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,4,128,1,float16,float16,0,0.1202288031578064
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,4,128,1,float16,float16,0,0.7243391990661621
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,4,128,1,float16,fp8,0,0.7416207790374756
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,4,128,1,fp8,fp8,0,0.11487840414047241
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,8,128,1,float16,float16,0,0.1224128007888794
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,8,128,1,fp8,fp8,0,0.11449439525604248
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,2,128,1,float16,float16,0,0.3719935894012451
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,2,128,1,float16,fp8,0,0.19497760534286498
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,40,128,1,float16,float16,0,0.13959200382232667
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,1,128,1,float16,float16,0,0.11989920139312744
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,1,128,1,float16,fp8,0,2.6690528869628904
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,1,128,1,float16,float16,0,2.870020866394043
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,4,128,1,float16,fp8,0,0.11526720523834229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,1,128,1,fp8,fp8,0,2.668796730041504
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,8,128,1,float16,fp8,0,0.1143183946609497
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,2,128,1,float16,float16,0,2.911115264892578
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,2,128,1,float16,fp8,0,2.6664335250854494
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,2,128,1,fp8,fp8,0,2.6681631088256834
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,4,128,1,float16,float16,0,3.025984001159668
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,4,128,1,float16,fp8,0,2.8104528427124023
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,4,128,1,fp8,fp8,0,2.6702848434448243
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,8,128,1,float16,fp8,0,2.6638431549072266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,8,128,1,float16,float16,0,3.1091503143310546
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,40,128,1,float16,float16,0,1.837179183959961
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,40,128,1,float16,fp8,0,1.627814483642578
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,1,128,1,float16,float16,0,1.3925583839416504
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,40,128,1,fp8,fp8,0,1.8142704010009765
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,1,128,1,fp8,fp8,0,1.4665663719177247
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,2,128,1,float16,fp8,0,1.3471504211425782
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,2,128,1,float16,float16,0,1.473539161682129
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,2,128,1,fp8,fp8,0,1.4539168357849122
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,4,128,1,float16,float16,0,1.4292847633361816
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,4,128,1,float16,fp8,0,1.366921615600586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,4,128,1,fp8,fp8,0,1.3474687576293944
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,8,128,1,float16,fp8,0,1.3616751670837401
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,8,128,1,float16,float16,0,1.5906895637512206
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,40,128,1,float16,float16,0,0.9242783546447754
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,8,128,1,fp8,fp8,0,1.398692798614502
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,40,128,1,float16,fp8,0,0.8278927803039551
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,1,128,1,float16,float16,0,0.7966879844665528
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,40,128,1,fp8,fp8,0,0.8315055847167969
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,8,128,1,fp8,fp8,0,2.8488927841186524
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,1,128,1,float16,fp8,0,0.6868271827697754
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,1,128,1,fp8,fp8,0,0.7043007850646973
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,2,128,1,float16,float16,0,0.7206704139709472
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,2,128,1,fp8,fp8,0,0.6877471923828125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,2,128,1,float16,fp8,0,0.7430272102355957
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,4,128,1,float16,float16,0,0.7098976135253906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,4,128,1,float16,fp8,0,0.7080880165100097
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,4,128,1,fp8,fp8,0,0.6915200233459473
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,8,128,1,float16,float16,0,0.7523263931274414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,40,128,1,float16,float16,0,0.4659728050231934
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,8,128,1,float16,fp8,0,0.6879695892333985
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,40,128,1,float16,fp8,0,0.43294081687927244
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,8,128,1,fp8,fp8,0,0.7274127960205078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,1,128,1,float16,float16,0,0.3986016035079956
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,1,128,1,float16,fp8,0,0.3596015930175781
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,1,128,1,fp8,fp8,0,0.3557552099227905
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,2,128,1,float16,float16,0,0.3615871906280518
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,2,128,1,float16,fp8,0,0.35987679958343505
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,2,128,1,fp8,fp8,0,0.36146399974822996
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,4,128,1,float16,float16,0,0.4058879852294922
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,4,128,1,float16,fp8,0,0.36275041103363037
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,4,128,1,fp8,fp8,0,0.3572304010391235
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,8,128,1,float16,fp8,0,0.3576256036758423
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,8,128,1,fp8,fp8,0,0.36095519065856935
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,40,128,1,float16,float16,0,0.26819519996643065
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,40,128,1,float16,fp8,0,0.22952160835266114
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,1,128,1,float16,float16,0,0.19517279863357545
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,1,128,1,float16,fp8,0,0.1929200053215027
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,1,128,1,fp8,fp8,0,0.2039407968521118
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,2,128,1,float16,float16,0,0.19582079648971557
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,2,128,1,float16,fp8,0,0.19258879423141478
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,2,128,1,fp8,fp8,0,0.1927008032798767
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,4,128,1,float16,float16,0,0.20273919105529786
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,4,128,1,float16,fp8,0,0.19388320446014404
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,4,128,1,fp8,fp8,0,0.1923632025718689
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,8,128,1,float16,float16,0,0.20305120944976807
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,8,128,1,float16,fp8,0,0.19892480373382568
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,40,128,1,float16,float16,0,0.13666559457778932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,40,128,1,float16,fp8,0,0.12861599922180175
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,1,128,1,float16,fp8,0,1.344980812072754
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,1,128,1,float16,float16,0,0.11201599836349488
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,1,128,1,fp8,fp8,0,0.10847359895706177
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,2,128,1,float16,float16,0,0.11169600486755371
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,2,128,1,float16,fp8,0,0.1078112006187439
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,4,128,1,float16,float16,0,0.11304479837417603
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,4,128,1,float16,fp8,0,0.10793919563293457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,4,128,1,fp8,fp8,0,0.10812000036239625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,8,128,1,float16,float16,0,0.11683679819107055
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,40,128,1,fp8,fp8,0,0.4292992115020752
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,8,128,1,float16,fp8,0,0.10874400138854981
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,8,128,1,fp8,fp8,0,0.10863840579986572
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,40,128,1,float16,float16,0,0.08247039914131164
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,40,128,1,float16,fp8,0,0.07559360265731811
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,40,128,1,fp8,fp8,0,0.07492640018463134
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,1,128,1,float16,float16,0,0.06984000205993653
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,1,128,1,float16,fp8,0,0.06595199704170226
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,1,128,1,fp8,fp8,0,0.06650400161743164
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,2,128,1,float16,float16,0,0.06905440092086793
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,8,128,1,float16,float16,0,0.37714879512786864
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,2,128,1,float16,fp8,0,0.06622880101203918
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,2,128,1,fp8,fp8,0,0.06609920263290406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,4,128,1,float16,float16,0,0.0701632022857666
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,40,128,1,fp8,fp8,0,0.22740640640258789
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,4,128,1,fp8,fp8,0,0.06599680185317994
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,8,128,1,float16,fp8,0,0.06644639968872071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,8,128,1,fp8,fp8,0,0.06629440188407898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,8,128,1,fp8,fp8,0,0.19381279945373536
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,1,128,1,float16,fp8,0,0.10817279815673828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,2,128,1,fp8,fp8,0,0.10850399732589722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,1,128,1,float16,float16,0,1.704422378540039
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,1,128,1,float16,fp8,0,1.6992912292480469
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,1,128,1,fp8,fp8,0,1.7052495956420899
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,2,128,1,float16,float16,0,1.7548831939697265
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,2,128,1,float16,fp8,0,1.701736068725586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,4,128,1,float16,fp8,0,0.06684319972991944
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,8,128,1,float16,float16,0,0.07118399739265442
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,2,128,1,fp8,fp8,0,1.7930240631103516
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,4,128,1,float16,fp8,0,1.7048208236694335
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,40,128,1,fp8,fp8,0,0.13156479597091675
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,4,128,1,fp8,fp8,0,1.8072847366333007
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,40,128,1,float16,float16,0,1.1935919761657714
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,8,128,1,float16,fp8,0,1.7014432907104493
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,8,128,1,fp8,fp8,0,1.7041055679321289
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,40,128,1,float16,fp8,0,1.0747407913208007
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,40,128,1,fp8,fp8,0,1.1023152351379395
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,1,128,1,float16,float16,0,0.8832799911499023
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,1,128,1,float16,fp8,0,0.8628527641296386
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,1,128,1,fp8,fp8,0,0.8629983901977539
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,2,128,1,float16,float16,0,0.8594032287597656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,2,128,1,float16,fp8,0,0.8629136085510254
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,2,128,1,fp8,fp8,0,0.9468416213989258
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,4,128,1,float16,float16,0,0.8862208366394043
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,4,128,1,float16,fp8,0,0.8628447532653809
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,4,128,1,float16,float16,0,1.7836687088012695
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,4,128,1,fp8,fp8,0,0.9093440055847168
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,8,128,1,float16,float16,0,0.9231663703918457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,40,128,1,float16,fp8,0,0.5505536079406739
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,40,128,1,fp8,fp8,0,0.5710912227630616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,8,128,1,float16,fp8,0,0.9079232215881348
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,1,128,1,float16,float16,0,0.44237117767333983
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,1,128,1,float16,fp8,0,0.441483211517334
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,8,128,1,float16,float16,0,1.8670640945434571
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,1,128,1,fp8,fp8,0,0.4515024185180664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,2,128,1,float16,float16,0,0.4538464069366455
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,2,128,1,float16,fp8,0,0.4414463996887207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,2,128,1,fp8,fp8,0,0.44127840995788575
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,4,128,1,float16,fp8,0,0.4444447994232178
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,4,128,1,float16,float16,0,0.4521920204162598
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,4,128,1,fp8,fp8,0,0.4439055919647217
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,8,128,1,float16,float16,0,0.4682000160217285
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,8,128,1,float16,fp8,0,0.4419839859008789
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,40,128,1,float16,float16,0,0.31039040088653563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,40,128,1,float16,fp8,0,0.28694720268249513
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,8,128,1,fp8,fp8,0,0.44506402015686036
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,40,128,1,fp8,fp8,0,0.28936479091644285
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,1,128,1,float16,float16,0,0.23186559677124025
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,1,128,1,fp8,fp8,0,0.23400640487670898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,2,128,1,float16,float16,0,0.23332641124725342
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,2,128,1,float16,fp8,0,0.23379840850830078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,2,128,1,fp8,fp8,0,0.23287839889526368
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,4,128,1,float16,float16,0,0.23614718914031982
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,4,128,1,float16,fp8,0,0.2348128080368042
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,4,128,1,fp8,fp8,0,0.23433918952941896
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,8,128,1,float16,float16,0,0.24362559318542482
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,8,128,1,fp8,fp8,0,0.23351199626922609
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,40,128,1,float16,float16,0,0.1660863995552063
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,40,128,1,float16,fp8,0,0.15543680191040038
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,40,128,1,fp8,fp8,0,0.15518720149993898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,1,128,1,float16,float16,0,0.13037919998168945
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,1,128,1,float16,fp8,0,0.12704319953918458
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,1,128,1,fp8,fp8,0,0.1268288016319275
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,2,128,1,float16,float16,0,0.1303647994995117
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,2,128,1,float16,fp8,0,0.12677279710769654
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,2,128,1,fp8,fp8,0,0.12861280441284179
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,4,128,1,float16,float16,0,0.1322975993156433
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,4,128,1,float16,fp8,0,0.1275552034378052
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,40,128,1,float16,float16,0,0.5979360103607178
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,8,128,1,float16,float16,0,0.13523839712142943
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,8,128,1,float16,fp8,0,0.12846879959106444
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,8,128,1,fp8,fp8,0,0.9162336349487304
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,8,128,1,fp8,fp8,0,0.12800320386886596
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,40,128,1,float16,float16,0,0.09409760236740113
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,40,128,1,float16,fp8,0,0.08931040167808532
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,40,128,1,fp8,fp8,0,0.08858879804611205
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,1,128,1,float16,float16,0,0.07632799744606018
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,1,128,1,fp8,fp8,0,0.07448639869689941
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,2,128,1,float16,float16,0,0.07656000256538391
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,2,128,1,float16,fp8,0,0.07473599910736084
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,2,128,1,fp8,fp8,0,0.07436479926109314
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,4,128,1,float16,float16,0,0.07762719988822937
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,4,128,1,fp8,fp8,0,0.07445920109748841
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,8,128,1,float16,float16,0,0.07907519936561584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,8,128,1,float16,fp8,0,0.07469760179519654
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,8,128,1,fp8,fp8,0,0.07444000244140625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,40,128,1,float16,float16,0,0.05683519840240479
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,40,128,1,float16,fp8,0,0.055713599920272826
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,40,128,1,fp8,fp8,0,0.0551472008228302
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,1,128,1,float16,float16,0,0.05054559707641602
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,1,128,1,float16,fp8,0,0.04940640032291412
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,1,128,1,fp8,fp8,0,0.04874399900436401
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,2,128,1,float16,fp8,0,0.04835520088672638
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,2,128,1,fp8,fp8,0,0.048344001173973083
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,4,128,1,float16,float16,0,0.05039039850234985
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,4,128,1,float16,fp8,0,0.04795520007610321
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,4,128,1,fp8,fp8,0,0.048763200640678406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,8,128,1,float16,float16,0,0.05146880149841308
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,8,128,1,float16,fp8,0,0.04901759922504425
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,8,128,1,fp8,fp8,0,0.04817439913749695
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,1,128,1,float16,fp8,0,0.2330575942993164
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,8,128,1,float16,fp8,0,0.2341439962387085
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,1,128,1,float16,float16,0,1.789561653137207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,1,128,1,float16,fp8,0,1.853276824951172
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,4,128,1,fp8,fp8,0,0.1268847942352295
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,1,128,1,fp8,fp8,0,1.8549583435058594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,4,128,1,float16,fp8,0,0.07482079863548279
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,2,128,1,float16,float16,0,1.795806312561035
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,2,128,1,float16,float16,0,0.04991520047187805
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,2,128,1,float16,fp8,0,1.9338703155517578
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,4,128,1,float16,float16,0,1.8663679122924806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,4,128,1,float16,fp8,0,1.8525152206420898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,4,128,1,fp8,fp8,0,1.856515121459961
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,8,128,1,float16,fp8,0,1.8521408081054687
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,8,128,1,float16,float16,0,2.0419647216796877
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,40,128,1,float16,float16,0,1.3458383560180665
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,8,128,1,fp8,fp8,0,1.8667488098144531
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,1,128,1,float16,fp8,0,0.07525920271873474
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,1,128,1,float16,float16,0,1.034440040588379
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,40,128,1,fp8,fp8,0,1.2135456085205079
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,1,128,1,fp8,fp8,0,0.9369312286376953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,1,128,1,float16,fp8,0,0.990988826751709
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,2,128,1,float16,float16,0,0.9178735733032226
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,2,128,1,float16,fp8,0,0.9859120368957519
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,2,128,1,fp8,fp8,0,0.9354512214660644
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,4,128,1,float16,float16,0,0.9279024124145507
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,4,128,1,float16,fp8,0,0.9417311668395996
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,8,128,1,float16,float16,0,0.9806783676147461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,8,128,1,float16,fp8,0,0.9359904289245605
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,40,128,1,float16,float16,0,0.673961591720581
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,8,128,1,fp8,fp8,0,0.9369440078735352
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,40,128,1,float16,fp8,0,0.626417589187622
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,40,128,1,fp8,fp8,0,0.6171343803405762
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,40,128,1,float16,fp8,0,1.2166272163391114
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,1,128,1,float16,float16,0,0.4705376148223877
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,1,128,1,float16,fp8,0,0.4760863780975342
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,1,128,1,fp8,fp8,0,0.4914912223815918
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,2,128,1,float16,fp8,0,0.47711520195007323
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,2,128,1,fp8,fp8,0,0.4774752140045166
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,4,128,1,float16,float16,0,0.4845952033996582
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,4,128,1,float16,fp8,0,0.4761184215545654
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,4,128,1,fp8,fp8,0,0.48352479934692383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,8,128,1,float16,float16,0,0.49466400146484374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,8,128,1,float16,fp8,0,0.47824320793151853
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,8,128,1,fp8,fp8,0,0.48763198852539064
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,40,128,1,float16,float16,0,0.3427200078964233
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,40,128,1,fp8,fp8,0,0.3183631896972656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,1,128,1,float16,float16,0,0.2427903890609741
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,2,128,1,fp8,fp8,0,1.8507600784301759
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,1,128,1,float16,fp8,0,0.25237278938293456
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,1,128,1,fp8,fp8,0,0.2480560064315796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,2,128,1,float16,float16,0,0.24245760440826417
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,2,128,1,float16,fp8,0,0.24936161041259766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,2,128,1,fp8,fp8,0,0.24826080799102784
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,4,128,1,float16,fp8,0,0.247107195854187
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,4,128,1,fp8,fp8,0,0.2485680103302002
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,8,128,1,float16,float16,0,0.2572959899902344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,8,128,1,float16,fp8,0,0.2482975959777832
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,8,128,1,fp8,fp8,0,0.24867680072784423
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,40,128,1,float16,fp8,0,0.16987839937210084
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,40,128,1,fp8,fp8,0,0.1690448045730591
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,1,128,1,float16,float16,0,0.13308160305023192
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,1,128,1,float16,fp8,0,0.1342960000038147
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,1,128,1,fp8,fp8,0,0.13407520055770875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,2,128,1,float16,float16,0,0.13326879739761352
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,2,128,1,float16,fp8,0,0.1339184045791626
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,2,128,1,fp8,fp8,0,0.13374559879302977
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,4,128,1,float16,float16,0,0.13522720336914062
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,4,128,1,float16,fp8,0,0.13465280532836915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,4,128,1,fp8,fp8,0,0.13395839929580688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,8,128,1,float16,fp8,0,0.1345728039741516
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,8,128,1,float16,float16,0,0.14056320190429689
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,8,128,1,fp8,fp8,0,0.1341488003730774
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,40,128,1,float16,float16,0,0.10079200267791748
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,40,128,1,float16,fp8,0,0.09450079798698426
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,40,128,1,fp8,fp8,0,0.09451199769973755
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,1,128,1,float16,float16,0,0.07611520290374756
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,1,128,1,float16,fp8,0,0.07506719827651978
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,2,128,1,float16,float16,0,0.46523199081420896
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,1,128,1,fp8,fp8,0,0.07495359778404236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,2,128,1,float16,fp8,0,0.07515040040016174
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,2,128,1,fp8,fp8,0,0.07474560141563416
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,4,128,1,float16,fp8,0,0.07518240213394164
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,4,128,1,fp8,fp8,0,0.07539680004119872
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,8,128,1,float16,float16,0,0.08078719973564148
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,8,128,1,float16,fp8,0,0.07544959783554077
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,8,128,1,fp8,fp8,0,0.07535679936408997
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,40,128,1,float16,float16,0,0.059601598978042604
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,40,128,1,float16,fp8,0,0.053566402196884154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,40,128,1,fp8,fp8,0,0.05358719825744629
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,1,128,1,float16,float16,0,0.04568000137805939
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,1,128,1,float16,fp8,0,0.04528000056743622
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,40,128,1,float16,fp8,0,0.32034080028533934
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,2,128,1,float16,float16,0,0.04599199891090393
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,2,128,1,float16,fp8,0,0.045300799608230594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,2,128,1,fp8,fp8,0,0.045265600085258484
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,4,128,1,float16,float16,0,0.0469296008348465
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,4,128,1,float16,fp8,0,0.04528320133686066
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,4,128,1,fp8,fp8,0,0.045419201254844666
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,8,128,1,float16,fp8,0,0.04530560076236725
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,4,128,1,float16,float16,0,0.24769120216369628
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,40,128,1,float16,float16,0,0.0371071994304657
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,4,128,1,fp8,fp8,0,0.936092758178711
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,40,128,1,float16,fp8,0,0.03710080087184906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,40,128,1,float16,float16,0,0.18082400560379028
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,1,128,1,float16,fp8,0,0.03288320004940033
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,1,128,1,fp8,fp8,0,0.0329008013010025
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,2,128,1,float16,float16,0,0.0329584002494812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,2,128,1,float16,fp8,0,0.03300319910049439
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,2,128,1,fp8,fp8,0,0.03296320140361786
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,4,128,1,float16,float16,0,0.034123200178146365
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,2,128,1,float16,float16,0,0.07632480263710022
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,4,128,1,float16,fp8,0,0.03303839862346649
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,4,128,1,float16,float16,0,0.07700160145759583
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,4,128,1,fp8,fp8,0,0.0330128014087677
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,8,128,1,float16,float16,0,0.035062399506568906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,8,128,1,float16,fp8,0,0.03299840092658997
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,8,128,1,fp8,fp8,0,0.03303839862346649
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,1,128,1,fp8,fp8,0,0.045270401239395144
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,8,128,1,float16,float16,0,0.04731679856777191
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,8,128,1,fp8,fp8,0,0.04530879855155945
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,40,1,128,1,float16,float16,0,1.354206371307373
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,40,1,128,1,float16,fp8,0,1.4431360244750977
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,1,128,1,float16,float16,0,0.033076798915863036
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,40,1,128,1,fp8,fp8,0,1.4416784286499023
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,40,2,128,1,float16,float16,0,1.3575119972229004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,40,2,128,1,float16,fp8,0,1.4428336143493652
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,40,2,128,1,fp8,fp8,0,1.4425840377807617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,40,4,128,1,float16,float16,0,1.3956576347351075
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,40,4,128,1,float16,fp8,0,1.4401087760925293
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,40,4,128,1,fp8,fp8,0,1.4380000114440918
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,40,8,128,1,float16,fp8,0,1.4391584396362305
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,40,40,128,1,float16,float16,0,1.0869183540344238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,40,40,128,1,float16,fp8,0,1.0007616043090821
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,40,8,128,1,fp8,fp8,0,1.4406160354614257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,40,40,128,1,fp8,fp8,0,0.9993984222412109
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,40,1,128,1,float16,float16,0,0.683571195602417
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,40,1,128,1,float16,fp8,0,0.7293920040130615
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,40,2,128,1,float16,float16,0,0.6861184120178223
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,40,128,1,fp8,fp8,0,0.03716320097446442
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,40,2,128,1,float16,fp8,0,0.7290495872497559
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,40,4,128,1,float16,float16,0,0.7051904201507568
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,40,4,128,1,float16,fp8,0,0.7283103942871094
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,40,4,128,1,fp8,fp8,0,0.727123212814331
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,40,8,128,1,float16,float16,0,0.7469935894012452
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,40,8,128,1,float16,fp8,0,0.7276991844177246
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,40,8,128,1,fp8,fp8,0,0.7254784107208252
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,40,40,128,1,float16,float16,0,0.5494416236877442
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,40,40,128,1,float16,fp8,0,0.5076848030090332
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,40,1,128,1,float16,float16,0,0.3512271881103516
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,40,1,128,1,float16,fp8,0,0.3714128017425537
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,40,40,128,1,fp8,fp8,0,0.5081727981567383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,40,1,128,1,fp8,fp8,0,0.37267680168151857
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,40,2,128,1,float16,float16,0,0.3515471935272217
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,40,2,128,1,float16,fp8,0,0.37186241149902344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,40,2,128,1,fp8,fp8,0,0.3708688020706177
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,40,4,128,1,float16,float16,0,0.36094400882720945
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,40,4,128,1,float16,fp8,0,0.37178399562835696
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,40,4,128,1,fp8,fp8,0,0.3715375900268555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,40,8,128,1,float16,float16,0,1.4782112121582032
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,40,8,128,1,float16,float16,0,0.3815648078918457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,40,8,128,1,fp8,fp8,0,0.37156798839569094
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,40,8,128,1,float16,fp8,0,0.3707711935043335
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,40,40,128,1,float16,float16,0,0.28242878913879393
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,40,40,128,1,float16,fp8,0,0.26200160980224607
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,40,40,128,1,fp8,fp8,0,0.26215200424194335
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,40,1,128,1,float16,fp8,0,0.19395359754562377
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,40,1,128,1,fp8,fp8,0,0.1930575966835022
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,40,2,128,1,float16,float16,0,0.1856063961982727
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,40,2,128,1,float16,fp8,0,0.19292800426483153
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,40,4,128,1,float16,float16,0,0.19056639671325684
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,40,4,128,1,float16,fp8,0,0.19285279512405396
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,40,4,128,1,fp8,fp8,0,0.19409919977188111
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,40,2,128,1,fp8,fp8,0,0.19363839626312257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,40,8,128,1,float16,float16,0,0.19848639965057374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,40,8,128,1,float16,fp8,0,0.19443199634552003
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,40,40,128,1,float16,float16,0,0.1487696051597595
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,40,8,128,1,fp8,fp8,0,0.19339840412139891
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,40,40,128,1,fp8,fp8,0,0.13916800022125245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,40,1,128,1,float16,float16,0,0.10220160484313964
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,40,1,128,1,float16,fp8,0,0.10476000308990478
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,40,2,128,1,fp8,fp8,0,0.7276159763336182
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,40,2,128,1,float16,fp8,0,0.10454239845275878
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,40,2,128,1,fp8,fp8,0,0.10400320291519165
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,40,4,128,1,float16,float16,0,0.10365279912948608
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,40,4,128,1,float16,fp8,0,0.10445760488510132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,40,4,128,1,fp8,fp8,0,0.10435839891433715
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,40,8,128,1,float16,float16,0,0.10818079710006714
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,40,8,128,1,float16,fp8,0,0.10494879484176636
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,40,40,128,1,float16,float16,0,0.08274719715118409
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,40,40,128,1,float16,fp8,0,0.07802720069885254
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,40,40,128,1,fp8,fp8,0,0.07795199751853943
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,40,1,128,1,float16,float16,0,0.05788959860801697
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,40,1,128,1,float16,fp8,0,0.05893599987030029
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,40,2,128,1,float16,float16,0,0.058455997705459596
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,40,2,128,1,float16,fp8,0,0.05887200236320496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,40,2,128,1,fp8,fp8,0,0.05892800092697144
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,40,4,128,1,float16,float16,0,0.059575998783111574
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,40,4,128,1,float16,fp8,0,0.05951039791107178
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,40,4,128,1,fp8,fp8,0,0.059571200609207155
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,40,8,128,1,float16,float16,0,0.06338719725608825
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,40,8,128,1,float16,fp8,0,0.05943999886512756
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,40,8,128,1,fp8,fp8,0,0.05958719849586487
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,40,40,128,1,float16,float16,0,0.04936479926109314
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,40,40,128,1,float16,fp8,0,0.04528000056743622
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,40,1,128,1,float16,float16,0,0.186135995388031
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,40,40,128,1,fp8,fp8,0,0.0452672004699707
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,40,1,128,1,float16,float16,0,0.03667680025100708
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,40,1,128,1,float16,fp8,0,0.03572160005569458
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,40,1,128,1,fp8,fp8,0,0.03691039979457855
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,40,2,128,1,float16,float16,0,0.03572799861431122
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,40,2,128,1,float16,fp8,0,0.03644959926605225
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,40,2,128,1,fp8,fp8,0,0.03628000020980835
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,40,4,128,1,float16,float16,0,0.037083199620246886
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,40,1,128,1,fp8,fp8,0,0.7303264141082764
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,40,40,128,1,float16,fp8,0,0.13914239406585693
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,40,4,128,1,float16,fp8,0,0.03675360083580017
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,40,8,128,1,float16,float16,0,0.03760479986667633
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,40,8,128,1,float16,fp8,0,0.03709439933300018
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,40,1,128,1,fp8,fp8,0,0.10448319911956787
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,40,2,128,1,float16,float16,0,0.10191359519958496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,40,40,128,1,float16,float16,0,0.030905601382255555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,40,40,128,1,float16,fp8,0,0.030934399366378783
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,40,40,128,1,fp8,fp8,0,0.03088639974594116
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,40,1,128,1,float16,float16,0,0.026867198944091796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,40,1,128,1,float16,fp8,0,0.02683840095996857
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,40,2,128,1,float16,float16,0,0.026915198564529418
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,40,2,128,1,float16,fp8,0,0.026844799518585205
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,40,2,128,1,fp8,fp8,0,0.02683840095996857
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,40,4,128,1,float16,float16,0,0.026848000288009644
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,40,4,128,1,float16,fp8,0,0.026872000098228453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,40,8,128,1,fp8,fp8,0,0.105075204372406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,40,8,128,1,float16,float16,0,0.028707200288772584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,40,8,128,1,float16,fp8,0,0.026836800575256347
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,40,8,128,1,fp8,fp8,0,0.02682879865169525
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,40,40,128,1,float16,float16,0,0.020662400126457214
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,40,40,128,1,float16,fp8,0,0.020691199600696562
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,40,40,128,1,fp8,fp8,0,0.02067199945449829
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,40,1,128,1,float16,float16,0,0.01863519996404648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,40,1,128,1,float16,fp8,0,0.018644799292087556
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,40,1,128,1,fp8,fp8,0,0.01868959963321686
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,40,1,128,1,fp8,fp8,0,0.05922399759292603
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,40,2,128,1,float16,fp8,0,0.01865919977426529
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,40,2,128,1,fp8,fp8,0,0.018694399297237395
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,40,4,128,1,float16,float16,0,0.018665599822998046
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,40,4,128,1,float16,fp8,0,0.01857759952545166
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,40,4,128,1,fp8,fp8,0,0.01866399943828583
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,40,8,128,1,float16,float16,0,0.018692800402641298
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,40,8,128,1,float16,fp8,0,0.018747200071811677
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,40,8,128,1,fp8,fp8,0,0.018667200207710268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,40,1,128,1,float16,float16,0,0.5648416042327881
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,40,1,128,1,float16,fp8,0,0.6187776088714599
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,40,1,128,1,fp8,fp8,0,0.6191760063171386
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,40,4,128,1,fp8,fp8,0,0.03705280125141144
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,40,8,128,1,fp8,fp8,0,0.03649759888648987
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,40,1,128,1,fp8,fp8,0,0.02683520019054413
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,40,2,128,1,float16,fp8,0,0.618936014175415
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,40,4,128,1,fp8,fp8,0,0.026840001344680786
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,40,2,128,1,fp8,fp8,0,0.618620777130127
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,40,4,128,1,float16,float16,0,0.5861567974090576
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,40,4,128,1,fp8,fp8,0,0.6165472030639648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,40,2,128,1,float16,float16,0,0.018598400056362152
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,40,8,128,1,float16,float16,0,0.6250527858734131
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,40,8,128,1,float16,fp8,0,0.6173295974731445
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,40,40,128,1,float16,float16,0,0.48407840728759766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,40,8,128,1,fp8,fp8,0,0.616649580001831
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,40,40,128,1,float16,fp8,0,0.44928798675537107
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,40,40,128,1,fp8,fp8,0,0.4499631881713867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,40,1,128,1,float16,float16,0,0.28910079002380373
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,40,1,128,1,float16,fp8,0,0.3143359899520874
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,40,1,128,1,fp8,fp8,0,0.31473278999328613
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,40,2,128,1,float16,float16,0,0.29034080505371096
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,40,2,128,1,float16,fp8,0,0.31385600566864014
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,40,4,128,1,float16,float16,0,0.29896318912506104
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,40,2,128,1,fp8,fp8,0,0.31426880359649656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,40,4,128,1,float16,fp8,0,0.3149359941482544
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,40,4,128,1,fp8,fp8,0,0.3147119998931885
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,40,8,128,1,float16,float16,0,0.3188352108001709
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,40,8,128,1,float16,fp8,0,0.3143887996673584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,40,40,128,1,float16,float16,0,0.24941439628601075
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,40,40,128,1,float16,fp8,0,0.23402879238128663
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,40,1,128,1,float16,float16,0,0.15444480180740355
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,40,40,128,1,fp8,fp8,0,0.2337023973464966
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,40,1,128,1,float16,fp8,0,0.16573760509490967
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,40,1,128,1,fp8,fp8,0,0.16603679656982423
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,40,2,128,1,float16,float16,0,0.15474560260772705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,40,2,128,1,float16,fp8,0,0.16532959938049316
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,40,2,128,1,fp8,fp8,0,0.1654736042022705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,40,4,128,1,float16,float16,0,0.1587407946586609
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,40,2,128,1,float16,float16,0,0.5667488098144531
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,40,4,128,1,float16,fp8,0,0.16581759452819825
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,40,4,128,1,fp8,fp8,0,0.16593919992446898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,40,8,128,1,float16,float16,0,0.16802239418029785
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,40,8,128,1,fp8,fp8,0,0.16618880033493041
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,40,40,128,1,float16,float16,0,0.13095359802246093
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,40,40,128,1,fp8,fp8,0,0.12212640047073364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,40,40,128,1,float16,fp8,0,0.12200800180435181
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,40,1,128,1,float16,fp8,0,0.08737120032310486
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,40,2,128,1,float16,float16,0,0.08358399868011475
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,40,2,128,1,float16,fp8,0,0.0874239981174469
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,40,2,128,1,fp8,fp8,0,0.08712159991264343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,40,4,128,1,float16,float16,0,0.08625119924545288
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,40,4,128,1,float16,fp8,0,0.08781279921531678
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,40,4,128,1,fp8,fp8,0,0.08741120100021363
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,40,8,128,1,float16,float16,0,0.0904416024684906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,40,8,128,1,float16,fp8,0,0.08814240097999573
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,40,8,128,1,fp8,fp8,0,0.088019198179245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,40,40,128,1,float16,float16,0,0.07423679828643799
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,40,40,128,1,float16,fp8,0,0.06981599926948548
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,40,40,128,1,fp8,fp8,0,0.0698303997516632
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,40,1,128,1,float16,float16,0,0.048990398645401
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,40,1,128,1,float16,fp8,0,0.05108799934387207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,40,1,128,1,fp8,fp8,0,0.05094720125198364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,40,2,128,1,float16,float16,0,0.049348801374435425
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,40,2,128,1,float16,fp8,0,0.051097601652145386
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,40,2,128,1,fp8,fp8,0,0.051267200708389284
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,40,4,128,1,float16,fp8,0,0.6182000160217285
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,40,4,128,1,float16,float16,0,0.05090559720993042
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,40,4,128,1,float16,fp8,0,0.050761598348617556
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,40,4,128,1,fp8,fp8,0,0.05135040283203125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,40,8,128,1,float16,float16,0,0.053527998924255374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,40,8,128,1,float16,fp8,0,0.050455999374389646
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,40,8,128,1,fp8,fp8,0,0.051472002267837526
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,40,40,128,1,float16,float16,0,0.04519680142402649
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,40,40,128,1,float16,fp8,0,0.041126400232315063
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,40,1,128,1,float16,float16,0,0.031020799279212953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,40,1,128,1,float16,fp8,0,0.03296479880809784
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,40,1,128,1,fp8,fp8,0,0.03281280100345611
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,40,2,128,1,float16,float16,0,0.03124319911003113
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,40,2,128,1,float16,fp8,0,0.03290719985961914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,40,2,128,1,fp8,fp8,0,0.032953599095344545
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,40,4,128,1,float16,float16,0,0.03259359896183014
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,40,4,128,1,float16,fp8,0,0.03296479880809784
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,40,4,128,1,fp8,fp8,0,0.0329120010137558
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,40,8,128,1,float16,float16,0,0.03300159871578216
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,40,8,128,1,float16,fp8,0,0.03289439976215362
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,40,8,128,1,fp8,fp8,0,0.03296160101890564
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,40,40,128,1,float16,float16,0,0.0268640011548996
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,40,40,128,1,float16,fp8,0,0.027928000688552855
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,40,40,128,1,fp8,fp8,0,0.027775999903678895
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,40,1,128,1,float16,float16,0,0.022728000581264497
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,40,1,128,1,float16,fp8,0,0.023171199858188628
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,40,1,128,1,fp8,fp8,0,0.023219199478626253
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,40,2,128,1,float16,float16,0,0.02279040068387985
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,40,8,128,1,fp8,fp8,0,0.31402719020843506
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,40,2,128,1,fp8,fp8,0,0.02343519926071167
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,40,4,128,1,float16,float16,0,0.022784000635147093
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,40,4,128,1,float16,fp8,0,0.02311840057373047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,40,4,128,1,fp8,fp8,0,0.02327679991722107
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,40,8,128,1,float16,float16,0,0.024718399345874786
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,40,8,128,1,float16,fp8,0,0.02356639951467514
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,40,8,128,1,fp8,fp8,0,0.02358720004558563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,40,40,128,1,float16,float16,0,0.01857440024614334
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,40,40,128,1,float16,fp8,0,0.018670399487018586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,40,40,128,1,fp8,fp8,0,0.018539200723171233
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,40,1,128,1,float16,float16,0,0.01576640009880066
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,40,1,128,1,float16,fp8,0,0.016406400501728056
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,40,1,128,1,fp8,fp8,0,0.015715199708938598
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,40,8,128,1,float16,fp8,0,0.16651040315628052
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,40,1,128,1,float16,float16,0,0.08366079926490784
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,40,2,128,1,float16,fp8,0,0.01619359999895096
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,40,2,128,1,fp8,fp8,0,0.016624000668525696
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,40,4,128,1,float16,float16,0,0.016545599699020384
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,40,1,128,1,fp8,fp8,0,0.08737279772758484
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,40,8,128,1,float16,float16,0,0.016595199704170227
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,40,8,128,1,float16,fp8,0,0.01650400012731552
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,40,8,128,1,fp8,fp8,0,0.016572800278663636
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,40,40,128,1,float16,float16,0,0.016641600430011748
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,40,40,128,1,float16,fp8,0,0.016572800278663636
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,40,40,128,1,fp8,fp8,0,0.016515199840068818
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,40,1,128,1,float16,float16,0,0.014601600170135499
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,40,1,128,1,float16,fp8,0,0.014899200201034546
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,40,1,128,1,fp8,fp8,0,0.014593599736690522
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,40,2,128,1,float16,float16,0,0.014878399670124054
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,40,2,128,1,float16,fp8,0,0.014606399834156037
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,40,2,128,1,fp8,fp8,0,0.014630399644374847
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,40,4,128,1,float16,float16,0,0.014724799990653991
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,40,4,128,1,float16,fp8,0,0.014864000678062438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,40,4,128,1,fp8,fp8,0,0.015054400265216827
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,40,8,128,1,float16,float16,0,0.014979200065135955
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,40,8,128,1,float16,fp8,0,0.015044799447059632
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,40,8,128,1,fp8,fp8,0,0.014844800531864166
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,40,1,128,1,float16,float16,0,0.34647679328918457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,40,40,128,1,fp8,fp8,0,0.0412447988986969
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,40,1,128,1,float16,fp8,0,0.37146720886230467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,40,1,128,1,fp8,fp8,0,0.3713792085647583
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,40,2,128,1,float16,float16,0,0.3472415924072266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,40,2,128,1,float16,fp8,0,0.37102880477905276
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,40,2,128,1,fp8,fp8,0,0.3709424018859863
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,40,4,128,1,float16,float16,0,0.3561775922775269
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,40,4,128,1,float16,fp8,0,0.3703727960586548
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,40,2,128,1,float16,fp8,0,0.023758399486541747
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,40,4,128,1,fp8,fp8,0,0.37092640399932864
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,40,8,128,1,float16,float16,0,0.3753119945526123
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,40,8,128,1,float16,fp8,0,0.3698528051376343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,40,8,128,1,fp8,fp8,0,0.37003839015960693
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,40,40,128,1,float16,float16,0,0.27275359630584717
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,40,2,128,1,float16,float16,0,0.01642719954252243
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,40,40,128,1,float16,fp8,0,0.2585024118423462
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,40,40,128,1,fp8,fp8,0,0.2579904079437256
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,40,4,128,1,float16,fp8,0,0.016395199298858642
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,40,4,128,1,fp8,fp8,0,0.016387200355529784
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,40,1,128,1,fp8,fp8,0,0.1905408024787903
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,40,2,128,1,float16,float16,0,0.1798095941543579
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,40,2,128,1,float16,fp8,0,0.1910912036895752
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,40,2,128,1,fp8,fp8,0,0.190174400806427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,40,4,128,1,float16,float16,0,0.1839136004447937
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,40,4,128,1,float16,fp8,0,0.1903007984161377
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,40,4,128,1,fp8,fp8,0,0.19075679779052734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,40,8,128,1,float16,float16,0,0.19311679601669313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,40,8,128,1,float16,fp8,0,0.1908352017402649
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,40,40,128,1,float16,float16,0,0.14162720441818238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,40,40,128,1,float16,fp8,0,0.13575680255889894
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,40,8,128,1,fp8,fp8,0,0.1909775972366333
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,40,40,128,1,fp8,fp8,0,0.13567520380020143
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,40,1,128,1,float16,float16,0,0.09654240012168884
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,40,1,128,1,float16,fp8,0,0.10219839811325074
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,40,1,128,1,fp8,fp8,0,0.10134880542755127
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,40,2,128,1,float16,float16,0,0.09699519872665405
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,40,2,128,1,float16,fp8,0,0.10088000297546387
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,40,2,128,1,fp8,fp8,0,0.10150400400161744
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,40,4,128,1,float16,float16,0,0.0992031991481781
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,40,4,128,1,float16,fp8,0,0.10153119564056397
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,40,8,128,1,float16,float16,0,0.10428160429000854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,40,8,128,1,float16,fp8,0,0.10175520181655884
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,40,40,128,1,float16,float16,0,0.07568320035934448
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,40,8,128,1,fp8,fp8,0,0.10223200321197509
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,40,40,128,1,float16,fp8,0,0.07292479872703553
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,40,40,128,1,fp8,fp8,0,0.07258560061454773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,40,1,128,1,float16,float16,0,0.05245440006256104
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,40,2,128,1,float16,float16,0,0.05308960080146789
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,40,1,128,1,fp8,fp8,0,0.054150402545928955
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,40,2,128,1,float16,fp8,0,0.05448319911956787
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,40,2,128,1,fp8,fp8,0,0.05441759824752808
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,40,4,128,1,float16,fp8,0,0.05501599907875061
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,40,4,128,1,fp8,fp8,0,0.05467039942741394
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,40,8,128,1,float16,float16,0,0.05750240087509155
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,40,8,128,1,float16,fp8,0,0.05491200089454651
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,40,40,128,1,float16,float16,0,0.04524799883365631
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,40,8,128,1,fp8,fp8,0,0.05532479882240295
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,40,40,128,1,float16,fp8,0,0.04116959869861603
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,40,40,128,1,fp8,fp8,0,0.041152000427246094
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,40,1,128,1,float16,float16,0,0.030953601002693176
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,40,1,128,1,float16,fp8,0,0.03299199938774109
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,40,2,128,1,float16,float16,0,0.030979201197624207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,40,1,128,1,fp8,fp8,0,0.03290719985961914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,40,2,128,1,float16,fp8,0,0.03289920091629028
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,40,2,128,1,fp8,fp8,0,0.032913601398468016
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,40,4,128,1,float16,float16,0,0.03200640082359314
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,40,4,128,1,float16,fp8,0,0.03300800025463104
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,40,4,128,1,fp8,fp8,0,0.03286559879779816
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,40,8,128,1,float16,float16,0,0.033024001121521
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,40,8,128,1,float16,fp8,0,0.032971200346946714
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,40,40,128,1,float16,float16,0,0.024728000164031982
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,40,8,128,1,fp8,fp8,0,0.033004799485206605
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,40,40,128,1,float16,fp8,0,0.026675200462341307
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,40,40,128,1,fp8,fp8,0,0.026438400149345398
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,40,1,128,1,float16,float16,0,0.020768000185489653
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,40,1,128,1,float16,fp8,0,0.022140799462795256
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,40,1,128,1,fp8,fp8,0,0.02069920003414154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,40,2,128,1,float16,float16,0,0.020848000049591066
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,40,2,128,1,float16,fp8,0,0.021488000452518464
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,40,1,128,1,float16,fp8,0,0.19087040424346924
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,40,2,128,1,fp8,fp8,0,0.02234400063753128
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,40,4,128,1,float16,float16,0,0.020678399503231047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,40,4,128,1,float16,fp8,0,0.021529600024223328
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,40,4,128,1,fp8,fp8,0,0.022251200675964356
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,40,8,128,1,float16,float16,0,0.02237759977579117
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,40,8,128,1,float16,fp8,0,0.02125920057296753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,40,40,128,1,float16,float16,0,0.017841599881649017
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,40,40,128,1,float16,fp8,0,0.018691200017929076
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,40,40,128,1,fp8,fp8,0,0.018614399433135986
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,40,1,128,1,float16,float16,0,0.01658719927072525
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,40,1,128,1,float16,fp8,0,0.01658560037612915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,40,1,128,1,fp8,fp8,0,0.01658560037612915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,40,2,128,1,float16,float16,0,0.01650400012731552
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,40,2,128,1,float16,fp8,0,0.01661919951438904
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,40,2,128,1,fp8,fp8,0,0.016603200137615202
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,40,4,128,1,float16,float16,0,0.01653439998626709
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,40,4,128,1,float16,fp8,0,0.0166143998503685
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,40,4,128,1,fp8,fp8,0,0.016569599509239197
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,40,8,128,1,float16,float16,0,0.016606399416923524
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,40,8,128,1,float16,fp8,0,0.016599999368190767
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,40,4,128,1,fp8,fp8,0,0.1011888027191162
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,40,8,128,1,fp8,fp8,0,0.016616000235080718
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,40,40,128,1,float16,float16,0,0.01247519999742508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,40,40,128,1,float16,fp8,0,0.012928000092506409
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,40,40,128,1,fp8,fp8,0,0.012535999715328216
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,40,1,128,1,float16,fp8,0,0.012430399656295776
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,40,1,128,1,fp8,fp8,0,0.012417600303888322
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,40,2,128,1,float16,float16,0,0.012439999729394913
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,40,2,128,1,float16,fp8,0,0.01241919994354248
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,40,1,128,1,float16,fp8,0,0.054636800289154054
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,40,2,128,1,fp8,fp8,0,0.012452799826860428
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,40,4,128,1,float16,float16,0,0.012379200011491776
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,40,4,128,1,float16,fp8,0,0.012548799812793731
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,40,4,128,1,float16,float16,0,0.05440639853477478
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,40,4,128,1,fp8,fp8,0,0.012488000094890594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,40,8,128,1,float16,float16,0,0.012425599992275238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,40,8,128,1,float16,fp8,0,0.012513600289821625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,40,8,128,1,fp8,fp8,0,0.01252640038728714
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,40,40,128,1,float16,float16,0,0.012470400333404541
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,40,40,128,1,float16,fp8,0,0.012433599680662155
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,40,40,128,1,fp8,fp8,0,0.012545600533485413
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,40,1,128,1,float16,float16,0,0.01055999994277954
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,40,1,128,1,float16,fp8,0,0.012385600060224534
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,40,1,128,1,fp8,fp8,0,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,40,2,128,1,float16,float16,0,0.012127999961376191
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,40,2,128,1,float16,fp8,0,0.011337599903345107
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,40,4,128,1,float16,float16,0,0.01040479987859726
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,40,4,128,1,float16,fp8,0,0.012468799948692322
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,40,4,128,1,fp8,fp8,0,0.01146719977259636
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,40,8,128,1,float16,float16,0,0.010499200224876404
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,40,8,128,1,float16,fp8,0,0.011512000113725662
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,40,8,128,1,fp8,fp8,0,0.010955200344324113
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,40,1,128,1,float16,float16,0,0.26652960777282714
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,40,1,128,1,float16,fp8,0,0.27700960636138916
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,40,1,128,1,fp8,fp8,0,0.27883200645446776
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,40,2,128,1,float16,float16,0,0.265665602684021
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,40,1,128,1,float16,float16,0,0.17969919443130494
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,40,2,128,1,fp8,fp8,0,0.27826719284057616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,40,4,128,1,float16,float16,0,0.2707376003265381
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,40,4,128,1,float16,fp8,0,0.27727680206298827
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,40,8,128,1,fp8,fp8,0,0.021367999911308288
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,40,8,128,1,float16,float16,0,0.2798847913742065
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,40,8,128,1,float16,fp8,0,0.27736639976501465
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,40,40,128,1,float16,float16,0,0.1838271975517273
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,40,8,128,1,fp8,fp8,0,0.2770224094390869
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,40,40,128,1,float16,fp8,0,0.17844799757003785
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,40,40,128,1,fp8,fp8,0,0.17725919485092162
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,40,1,128,1,float16,fp8,0,0.14432640075683595
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,40,1,128,1,fp8,fp8,0,0.1439792037010193
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,40,1,128,1,float16,float16,0,0.012307199835777282
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,40,2,128,1,float16,float16,0,0.140339195728302
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,40,2,128,1,float16,fp8,0,0.14398080110549927
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,40,2,128,1,fp8,fp8,0,0.14377119541168212
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,40,4,128,1,float16,float16,0,0.14205119609832764
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,40,4,128,1,float16,fp8,0,0.1438928008079529
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,40,4,128,1,fp8,fp8,0,0.14411200284957887
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,40,8,128,1,float16,float16,0,0.1469871997833252
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,40,8,128,1,float16,fp8,0,0.14424959421157837
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,40,2,128,1,fp8,fp8,0,0.01242400035262108
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,40,8,128,1,fp8,fp8,0,0.1441200017929077
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,40,40,128,1,float16,fp8,0,0.09446240067481995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,40,40,128,1,fp8,fp8,0,0.09453279972076416
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,40,1,128,1,float16,float16,0,0.07554399967193604
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,40,1,128,1,float16,fp8,0,0.07623839974403382
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,40,1,128,1,fp8,fp8,0,0.07638400197029113
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,40,2,128,1,float16,float16,0,0.07623040080070495
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,40,2,128,1,float16,fp8,0,0.07654079794883728
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,40,2,128,1,fp8,fp8,0,0.07638239860534668
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,40,4,128,1,float16,float16,0,0.07753599882125854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,40,4,128,1,float16,fp8,0,0.07641599774360656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,40,4,128,1,fp8,fp8,0,0.07641280293464661
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,40,8,128,1,float16,float16,0,0.07974079847335816
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,40,8,128,1,fp8,fp8,0,0.0765887975692749
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,40,40,128,1,float16,float16,0,0.05501919984817505
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,40,40,128,1,fp8,fp8,0,0.05112959742546082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,40,1,128,1,float16,float16,0,0.04139519929885864
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,40,1,128,1,float16,fp8,0,0.042044800519943235
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,40,1,128,1,fp8,fp8,0,0.0418287992477417
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,40,2,128,1,float16,fp8,0,0.2780495882034302
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,40,2,128,1,float16,fp8,0,0.041886401176452634
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,40,2,128,1,fp8,fp8,0,0.04297119975090027
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,40,4,128,1,float16,float16,0,0.042412799596786496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,40,4,128,1,float16,fp8,0,0.042955198884010316
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,40,4,128,1,fp8,fp8,0,0.04290400147438049
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,40,8,128,1,float16,float16,0,0.04352799952030182
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,40,4,128,1,fp8,fp8,0,0.27741920948028564
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,40,8,128,1,float16,fp8,0,0.04318079948425293
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,40,40,128,1,float16,float16,0,0.028884801268577575
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,40,40,128,1,float16,fp8,0,0.030921599268913268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,40,40,128,1,fp8,fp8,0,0.030852800607681273
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,40,1,128,1,float16,float16,0,0.026636800169944762
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,40,1,128,1,float16,fp8,0,0.02656480073928833
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,40,1,128,1,fp8,fp8,0,0.026817598938941957
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,40,2,128,1,float16,float16,0,0.02531839907169342
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,40,2,128,1,float16,fp8,0,0.026843199133872987
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,40,1,128,1,float16,float16,0,0.13927040100097657
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,40,4,128,1,float16,float16,0,0.0268528014421463
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,40,4,128,1,float16,fp8,0,0.026340800523757934
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,40,4,128,1,fp8,fp8,0,0.026848000288009644
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,40,8,128,1,float16,float16,0,0.026859200000762938
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,40,8,128,1,float16,fp8,0,0.026395198702812196
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,40,8,128,1,fp8,fp8,0,0.026552000641822816
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,40,40,128,1,float16,float16,0,0.01895360052585602
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,40,40,128,1,fp8,fp8,0,0.020720000565052032
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,40,1,128,1,float16,float16,0,0.018193599581718446
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,40,1,128,1,float16,fp8,0,0.018432000279426576
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,40,1,128,1,fp8,fp8,0,0.018401600420475006
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,40,40,128,1,float16,float16,0,0.09660639762878417
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,40,2,128,1,float16,float16,0,0.01823199987411499
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,40,2,128,1,float16,fp8,0,0.018632000684738158
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,40,2,128,1,fp8,fp8,0,0.018535999953746794
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,40,4,128,1,float16,float16,0,0.018595199286937713
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,40,4,128,1,float16,fp8,0,0.01857440024614334
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,40,4,128,1,fp8,fp8,0,0.018643200397491455
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,40,8,128,1,float16,float16,0,0.018654400110244752
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,40,8,128,1,float16,fp8,0,0.018671999871730804
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,40,8,128,1,fp8,fp8,0,0.018651199340820313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,40,40,128,1,float16,float16,0,0.014747199416160584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,40,40,128,1,float16,fp8,0,0.014844800531864166
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,40,40,128,1,fp8,fp8,0,0.014603200554847717
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,40,1,128,1,float16,float16,0,0.014483200013637542
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,40,1,128,1,float16,fp8,0,0.01454080045223236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,40,8,128,1,float16,fp8,0,0.07698079943656921
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,40,2,128,1,float16,float16,0,0.01454399973154068
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,40,2,128,1,float16,fp8,0,0.014580799639225006
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,40,40,128,1,float16,fp8,0,0.05051040053367615
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,40,2,128,1,fp8,fp8,0,0.014475199580192565
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,40,4,128,1,float16,float16,0,0.01456640064716339
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,40,4,128,1,fp8,fp8,0,0.014552000164985656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,40,8,128,1,float16,float16,0,0.014484800398349762
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,40,8,128,1,float16,fp8,0,0.014542399346828461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,40,2,128,1,float16,float16,0,0.04161120057106018
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,40,8,128,1,fp8,fp8,0,0.014481599628925323
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,40,40,128,1,float16,float16,0,0.012406399846076966
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,40,40,128,1,float16,fp8,0,0.010569600015878677
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,40,40,128,1,fp8,fp8,0,0.010572800040245056
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,40,1,128,1,float16,float16,0,0.010412800312042236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,40,1,128,1,float16,fp8,0,0.010486400127410889
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,40,2,128,1,float16,float16,0,0.010436800122261048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,40,8,128,1,fp8,fp8,0,0.04312959909439087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,40,2,128,1,fp8,fp8,0,0.010428799688816071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,40,4,128,1,float16,float16,0,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,40,4,128,1,float16,fp8,0,0.010416000336408614
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,40,4,128,1,fp8,fp8,0,0.010576000064611435
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,40,8,128,1,float16,float16,0,0.01048320010304451
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,40,2,128,1,fp8,fp8,0,0.02648639976978302
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,40,8,128,1,float16,fp8,0,0.010424000024795533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,40,8,128,1,fp8,fp8,0,0.01048320010304451
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,40,40,128,1,float16,float16,0,0.010576000064611435
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,40,40,128,1,float16,fp8,0,0.010471999645233154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,40,40,128,1,fp8,fp8,0,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,40,1,128,1,float16,float16,0,0.010329599678516387
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,40,1,128,1,float16,fp8,0,0.010529600083827972
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,40,40,128,1,float16,fp8,0,0.020691199600696562
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,40,2,128,1,float16,float16,0,0.010446400195360184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,40,2,128,1,fp8,fp8,0,0.010452800244092942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,40,4,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,40,4,128,1,float16,fp8,0,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,40,4,128,1,fp8,fp8,0,0.010444799810647965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,40,8,128,1,float16,float16,0,0.010489600151777268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,40,8,128,1,float16,fp8,0,0.010436800122261048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,40,8,128,1,fp8,fp8,0,0.0104592002928257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,40,1,128,1,float16,float16,0,0.2285856008529663
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,40,1,128,1,fp8,fp8,0,0.014529600739479065
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,40,4,128,1,float16,fp8,0,0.01446399986743927
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,40,1,128,1,fp8,fp8,0,0.23336479663848878
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,40,2,128,1,float16,float16,0,0.229531192779541
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,40,1,128,1,fp8,fp8,0,0.01048159971833229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,40,2,128,1,float16,fp8,0,0.23407199382781982
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,40,2,128,1,float16,fp8,0,0.010515200346708298
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,40,4,128,1,float16,float16,0,0.23058879375457764
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,40,4,128,1,float16,fp8,0,0.23382079601287842
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,40,4,128,1,fp8,fp8,0,0.23348479270935057
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,40,8,128,1,float16,fp8,0,0.23383519649505616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,40,40,128,1,float16,float16,0,0.1405776023864746
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,40,8,128,1,float16,float16,0,0.23527679443359376
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,40,40,128,1,fp8,fp8,0,0.13869600296020507
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,40,1,128,1,float16,float16,0,0.12053920030593872
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,40,1,128,1,float16,fp8,0,0.12051359415054322
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,40,1,128,1,fp8,fp8,0,0.12114880084991456
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,40,2,128,1,float16,float16,0,0.12036960124969483
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,40,2,128,1,float16,fp8,0,0.12091200351715088
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,40,2,128,1,float16,fp8,0,0.010582400113344192
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,40,2,128,1,fp8,fp8,0,0.1213487982749939
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,40,4,128,1,float16,float16,0,0.12165919542312623
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,40,4,128,1,float16,fp8,0,0.12140320539474488
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,40,4,128,1,fp8,fp8,0,0.12130399942398071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,40,8,128,1,float16,float16,0,0.12420159578323364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,40,8,128,1,float16,fp8,0,0.1216271996498108
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,40,8,128,1,fp8,fp8,0,0.1216271996498108
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,40,40,128,1,float16,float16,0,0.07776479721069336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,40,40,128,1,float16,fp8,0,0.07404639720916747
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,40,40,128,1,fp8,fp8,0,0.07350559830665589
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,40,1,128,1,float16,float16,0,0.06537759900093079
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,40,1,128,1,float16,fp8,0,0.06580320000648499
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,40,1,128,1,fp8,fp8,0,0.06502400040626526
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,40,2,128,1,float16,float16,0,0.06505759954452514
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,40,2,128,1,float16,fp8,0,0.06543840169906616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,40,2,128,1,fp8,fp8,0,0.06462560296058655
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,40,4,128,1,float16,float16,0,0.06630719900131225
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,40,4,128,1,float16,fp8,0,0.0653551995754242
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,40,4,128,1,fp8,fp8,0,0.06484959721565246
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,40,8,128,1,float16,float16,0,0.06739680171012878
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,40,8,128,1,float16,fp8,0,0.06577600240707397
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,40,8,128,1,fp8,fp8,0,0.0648751974105835
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,40,40,128,1,float16,float16,0,0.041203200817108154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,40,40,128,1,float16,fp8,0,0.04116959869861603
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,40,1,128,1,float16,float16,0,0.03744640052318573
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,40,1,128,1,float16,fp8,0,0.03693119883537292
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,40,1,128,1,fp8,fp8,0,0.037191998958587644
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,40,2,128,1,float16,float16,0,0.03716480135917664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,40,2,128,1,float16,fp8,0,0.03751519918441772
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,40,2,128,1,fp8,fp8,0,0.03686560094356537
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,40,4,128,1,float16,float16,0,0.03741759955883026
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,40,4,128,1,float16,fp8,0,0.03706879913806915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,40,8,128,1,float16,float16,0,0.038812801241874695
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,40,4,128,1,fp8,fp8,0,0.03711999952793121
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,40,8,128,1,float16,fp8,0,0.037038400769233704
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,40,2,128,1,fp8,fp8,0,0.23370399475097656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,40,40,128,1,float16,float16,0,0.025014400482177734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,40,40,128,1,float16,fp8,0,0.024772800505161285
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,40,40,128,1,fp8,fp8,0,0.024900799989700316
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,40,1,128,1,float16,float16,0,0.022862400114536285
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,40,1,128,1,float16,fp8,0,0.022833600640296936
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,40,1,128,1,fp8,fp8,0,0.02269120067358017
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,40,2,128,1,float16,float16,0,0.022742399573326112
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,40,2,128,1,float16,fp8,0,0.02274879962205887
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,40,2,128,1,fp8,fp8,0,0.0227743998169899
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,40,4,128,1,float16,float16,0,0.022735999524593355
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,40,40,128,1,float16,fp8,0,0.13870079517364503
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,40,4,128,1,fp8,fp8,0,0.022779199481010436
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,40,8,128,1,float16,float16,0,0.022974400222301482
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,40,8,128,1,float16,fp8,0,0.022782400250434875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,40,1,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,40,8,128,1,fp8,fp8,0,0.022763200104236603
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,40,40,128,1,float16,float16,0,0.018161599338054658
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,40,40,128,1,float16,fp8,0,0.016657599806785585
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,40,40,128,1,fp8,fp8,0,0.017798399925231932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,40,1,128,1,float16,float16,0,0.01659200042486191
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,40,1,128,1,fp8,fp8,0,0.016620799899101257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,40,1,128,1,float16,fp8,0,0.016601599752902985
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,40,2,128,1,float16,float16,0,0.016641600430011748
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,40,2,128,1,float16,fp8,0,0.01653439998626709
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,40,2,128,1,fp8,fp8,0,0.016641600430011748
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,40,4,128,1,float16,fp8,0,0.016606399416923524
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,40,4,128,1,fp8,fp8,0,0.01666879951953888
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,40,8,128,1,float16,float16,0,0.01664319932460785
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,40,8,128,1,float16,fp8,0,0.016607999801635742
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,40,8,128,1,fp8,fp8,0,0.01664000004529953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,40,40,128,1,float16,float16,0,0.014574399590492249
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,40,40,128,1,float16,fp8,0,0.012612800300121307
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,40,40,128,1,fp8,fp8,0,0.012601600587368011
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,40,40,128,1,fp8,fp8,0,0.04119519889354706
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,40,1,128,1,float16,float16,0,0.012510399520397186
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,40,1,128,1,float16,fp8,0,0.012470400333404541
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,40,2,128,1,float16,float16,0,0.01250080019235611
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,40,2,128,1,float16,fp8,0,0.012513600289821625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,40,2,128,1,fp8,fp8,0,0.012577599287033081
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,40,4,128,1,float16,float16,0,0.012555199861526489
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,40,4,128,1,float16,fp8,0,0.012494400143623352
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,40,8,128,1,fp8,fp8,0,0.03703519999980927
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,40,4,128,1,fp8,fp8,0,0.01252799928188324
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,40,8,128,1,float16,float16,0,0.012520000338554382
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,40,8,128,1,float16,fp8,0,0.012561599910259246
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,40,8,128,1,fp8,fp8,0,0.012540799379348756
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,40,40,128,1,float16,float16,0,0.010444799810647965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,40,40,128,1,float16,fp8,0,0.010491199791431427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,40,40,128,1,fp8,fp8,0,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,40,8,128,1,fp8,fp8,0,0.23409600257873536
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,40,1,128,1,float16,fp8,0,0.010393600165843963
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,40,1,128,1,fp8,fp8,0,0.010407999902963639
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,40,2,128,1,float16,float16,0,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,40,4,128,1,float16,fp8,0,0.02277279943227768
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,40,2,128,1,float16,fp8,0,0.010419200360774993
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,40,4,128,1,float16,float16,0,0.010364799946546554
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,40,4,128,1,float16,fp8,0,0.010452800244092942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,40,4,128,1,fp8,fp8,0,0.010391999781131745
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,40,8,128,1,float16,float16,0,0.010424000024795533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,40,8,128,1,float16,fp8,0,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,40,8,128,1,fp8,fp8,0,0.010452800244092942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,40,40,128,1,float16,float16,0,0.01045759990811348
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,40,40,128,1,float16,fp8,0,0.010406400263309478
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,40,40,128,1,fp8,fp8,0,0.010353600233793258
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,40,1,128,1,float16,float16,0,0.009419199824333192
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,40,1,128,1,float16,fp8,0,0.01032480001449585
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,40,1,128,1,fp8,fp8,0,0.010435199737548828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,40,2,128,1,float16,float16,0,0.010422399640083313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,40,2,128,1,float16,fp8,0,0.010411199927330018
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,40,2,128,1,fp8,fp8,0,0.010355199873447418
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,40,4,128,1,float16,float16,0,0.009335999935865402
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,40,4,128,1,fp8,fp8,0,0.010353600233793258
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,40,8,128,1,float16,float16,0,0.00952640026807785
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,40,8,128,1,float16,fp8,0,0.010355199873447418
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,40,8,128,1,fp8,fp8,0,0.01026879996061325
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,40,4,128,1,float16,float16,0,0.016633599996566772
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,40,1,128,1,float16,float16,0,0.21833760738372804
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,40,1,128,1,float16,fp8,0,0.21024320125579835
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,40,1,128,1,fp8,fp8,0,0.20970399379730226
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,40,2,128,1,float16,float16,0,0.21786079406738282
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,40,2,128,1,float16,fp8,0,0.21078720092773437
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,40,2,128,1,fp8,fp8,0,0.21096959114074706
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,40,1,128,1,float16,fp8,0,0.234550404548645
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,40,4,128,1,float16,float16,0,0.2190335988998413
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,40,4,128,1,float16,fp8,0,0.2114543914794922
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,40,4,128,1,fp8,fp8,0,0.2101583957672119
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,40,8,128,1,float16,float16,0,0.2222223997116089
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,40,8,128,1,float16,fp8,0,0.21125760078430175
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,40,1,128,1,fp8,fp8,0,0.012539200484752655
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,40,40,128,1,float16,float16,0,0.12780159711837769
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,40,40,128,1,float16,fp8,0,0.11889599561691284
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,40,40,128,1,fp8,fp8,0,0.11774719953536987
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,40,8,128,1,fp8,fp8,0,0.21190080642700196
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,40,1,128,1,float16,float16,0,0.11555839776992798
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,40,1,128,1,float16,fp8,0,0.10969280004501343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,40,1,128,1,fp8,fp8,0,0.10996479988098144
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,40,2,128,1,float16,float16,0,0.11450560092926025
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,40,2,128,1,float16,fp8,0,0.11016639471054077
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,40,2,128,1,fp8,fp8,0,0.10966240167617798
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,40,4,128,1,float16,float16,0,0.11561599969863892
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,40,4,128,1,float16,fp8,0,0.10990079641342163
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,40,4,128,1,fp8,fp8,0,0.10933120250701904
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,40,8,128,1,float16,float16,0,0.11715840101242066
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,40,40,128,1,float16,float16,0,0.06731680035591125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,40,8,128,1,fp8,fp8,0,0.11001919507980347
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,40,8,128,1,float16,fp8,0,0.10967999696731567
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,40,40,128,1,fp8,fp8,0,0.06436319947242737
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,40,1,128,1,float16,float16,0,0.06364319920539856
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,40,1,128,1,float16,fp8,0,0.05970240235328674
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,40,1,128,1,fp8,fp8,0,0.05981919765472412
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,40,2,128,1,float16,float16,0,0.06422560214996338
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,40,2,128,1,float16,fp8,0,0.05992479920387268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,40,1,128,1,float16,float16,0,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,40,4,128,1,float16,fp8,0,0.060139197111129764
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,40,2,128,1,fp8,fp8,0,0.059736001491546634
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,40,4,128,1,fp8,fp8,0,0.059875202178955075
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,40,8,128,1,float16,float16,0,0.06446880102157593
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,40,8,128,1,float16,fp8,0,0.059628802537918094
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,40,40,128,1,float16,float16,0,0.03832319974899292
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,40,40,128,1,float16,fp8,0,0.03587839901447296
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,40,40,128,1,fp8,fp8,0,0.036078399419784545
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,40,1,128,1,float16,float16,0,0.036796799302101134
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,40,1,128,1,float16,fp8,0,0.03450239896774292
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,40,1,128,1,fp8,fp8,0,0.03392159938812256
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,40,2,128,1,float16,float16,0,0.03684639930725098
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,40,2,128,1,float16,fp8,0,0.03407039940357208
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,40,2,128,1,fp8,fp8,0,0.033723199367523195
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,40,4,128,1,float16,float16,0,0.03657279908657074
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,40,4,128,1,float16,fp8,0,0.034841600060462954
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,40,4,128,1,float16,fp8,0,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,40,4,128,1,fp8,fp8,0,0.03360320031642914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,40,8,128,1,float16,float16,0,0.03711360096931458
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,40,8,128,1,float16,fp8,0,0.03490560054779053
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,40,8,128,1,fp8,fp8,0,0.034852799773216245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,40,40,128,1,float16,float16,0,0.024795199930667877
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,40,40,128,1,float16,fp8,0,0.022728000581264497
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,40,40,128,1,fp8,fp8,0,0.022787199914455415
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,40,1,128,1,float16,float16,0,0.02274399995803833
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,40,1,128,1,float16,fp8,0,0.022411200404167175
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,40,1,128,1,fp8,fp8,0,0.021987199783325195
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,40,2,128,1,float16,float16,0,0.02279199957847595
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,40,2,128,1,float16,fp8,0,0.02244960069656372
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,40,4,128,1,float16,float16,0,0.02274879962205887
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,40,4,128,1,float16,fp8,0,0.022409600019454957
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,40,4,128,1,fp8,fp8,0,0.022270399332046508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,40,8,128,1,float16,float16,0,0.02269120067358017
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,40,8,128,1,float16,fp8,0,0.022731199860572815
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,40,8,128,1,fp8,fp8,0,0.02136960029602051
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,40,40,128,1,float16,float16,0,0.01660960018634796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,40,40,128,1,float16,fp8,0,0.01659359931945801
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,40,1,128,1,float16,float16,0,0.016470399498939515
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,40,1,128,1,float16,fp8,0,0.016102400422096253
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,40,1,128,1,fp8,fp8,0,0.01637440025806427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,40,2,128,1,float16,float16,0,0.01653279960155487
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,40,2,128,1,float16,fp8,0,0.016451199352741242
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,40,40,128,1,fp8,fp8,0,0.01660960018634796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,40,2,128,1,fp8,fp8,0,0.014927999675273895
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,40,4,128,1,float16,float16,0,0.01653600037097931
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,40,4,128,1,float16,fp8,0,0.01478240042924881
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,40,4,128,1,fp8,fp8,0,0.014856000244617463
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,40,8,128,1,float16,float16,0,0.016579200327396394
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,40,8,128,1,float16,fp8,0,0.01656319946050644
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,40,8,128,1,fp8,fp8,0,0.01523679941892624
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,40,40,128,1,float16,float16,0,0.014550399780273438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,40,40,128,1,float16,fp8,0,0.012516799569129943
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,40,40,128,1,fp8,fp8,0,0.012729600071907043
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,40,1,128,1,float16,float16,0,0.01255200058221817
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,40,1,128,1,float16,fp8,0,0.01252640038728714
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,40,1,128,1,fp8,fp8,0,0.01247360035777092
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,40,2,128,1,float16,float16,0,0.012452799826860428
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,40,2,128,1,float16,fp8,0,0.012457600235939026
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,40,2,128,1,fp8,fp8,0,0.01252799928188324
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,40,4,128,1,float16,float16,0,0.012567999958992004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,40,4,128,1,float16,fp8,0,0.012516799569129943
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,40,4,128,1,fp8,fp8,0,0.012558400630950928
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,40,8,128,1,float16,float16,0,0.012511999905109405
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,40,4,128,1,float16,float16,0,0.06444960236549377
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,40,8,128,1,fp8,fp8,0,0.01250240057706833
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,40,40,128,1,float16,float16,0,0.010489600151777268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,40,40,128,1,float16,fp8,0,0.010494399815797806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,40,40,128,1,fp8,fp8,0,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,40,2,128,1,fp8,fp8,0,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,40,1,128,1,float16,float16,0,0.010529600083827972
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,40,1,128,1,float16,fp8,0,0.009494400024414063
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,40,8,128,1,fp8,fp8,0,0.05994079709053039
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,40,2,128,1,float16,float16,0,0.010494399815797806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,40,2,128,1,float16,fp8,0,0.009571199864149093
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,40,4,128,1,float16,float16,0,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,40,4,128,1,float16,fp8,0,0.009462399780750275
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,40,8,128,1,float16,float16,0,0.010361599922180175
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,40,4,128,1,fp8,fp8,0,0.010447999835014344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,40,8,128,1,float16,fp8,0,0.009667199850082398
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,40,8,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,40,40,128,1,float16,float16,0,0.0104592002928257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,40,40,128,1,float16,fp8,0,0.010516799986362457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,40,40,128,1,fp8,fp8,0,0.008934400230646133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,40,1,128,1,float16,float16,0,0.010446400195360184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,40,1,128,1,fp8,fp8,0,0.010427200049161912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,40,1,128,1,float16,fp8,0,0.009574399888515472
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,40,2,128,1,float16,fp8,0,0.009913600236177444
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,40,2,128,1,fp8,fp8,0,0.009886399656534196
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,40,4,128,1,float16,float16,0,0.0104032002389431
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,40,2,128,1,fp8,fp8,0,0.022075200080871583
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,40,4,128,1,float16,fp8,0,0.008489599823951722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,40,4,128,1,fp8,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,40,8,128,1,float16,fp8,0,0.010358399897813796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,40,8,128,1,fp8,fp8,0,0.01048159971833229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,40,1,128,1,float16,float16,0,0.21582560539245604
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,40,1,128,1,float16,fp8,0,0.20175518989562988
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,40,1,128,1,fp8,fp8,0,0.20184319019317626
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,40,2,128,1,float16,float16,0,0.21619839668273927
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,40,2,128,1,float16,fp8,0,0.20214879512786865
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,40,40,128,1,float16,fp8,0,0.06392319798469544
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,40,2,128,1,fp8,fp8,0,0.2020143985748291
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,40,4,128,1,float16,float16,0,0.21491520404815673
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,40,8,128,1,float16,fp8,0,0.012454400211572647
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,40,4,128,1,float16,fp8,0,0.20223040580749513
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,40,4,128,1,fp8,fp8,0,0.2019200086593628
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,40,8,128,1,float16,float16,0,0.21503360271453859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,40,1,128,1,fp8,fp8,0,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,40,8,128,1,float16,fp8,0,0.20226240158081055
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,40,8,128,1,fp8,fp8,0,0.2019184112548828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,40,40,128,1,float16,float16,0,0.11525919437408447
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,40,2,128,1,fp8,fp8,0,0.010452800244092942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,40,40,128,1,float16,fp8,0,0.10636800527572632
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,40,40,128,1,fp8,fp8,0,0.10744800567626953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,40,1,128,1,float16,fp8,0,0.1070207953453064
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,40,1,128,1,fp8,fp8,0,0.10587519407272339
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,40,2,128,1,float16,float16,0,0.11343040466308593
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,40,2,128,1,float16,fp8,0,0.10606399774551392
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,40,2,128,1,fp8,fp8,0,0.1069648027420044
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,40,4,128,1,float16,float16,0,0.11358879804611206
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,40,4,128,1,fp8,fp8,0,0.10624480247497559
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,40,4,128,1,float16,fp8,0,0.10635520219802856
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,40,8,128,1,float16,fp8,0,0.10644799470901489
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,40,2,128,1,float16,float16,0,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,40,40,128,1,float16,float16,0,0.06362400054931641
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,40,8,128,1,fp8,fp8,0,0.10642880201339722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,40,40,128,1,float16,fp8,0,0.05869600176811218
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,40,40,128,1,fp8,fp8,0,0.0588703989982605
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,40,1,128,1,float16,float16,0,0.06298559904098511
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,40,1,128,1,float16,fp8,0,0.05878239870071411
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,40,1,128,1,fp8,fp8,0,0.0579695999622345
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,40,2,128,1,float16,float16,0,0.0624064028263092
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,40,2,128,1,float16,fp8,0,0.05826560258865356
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,40,2,128,1,fp8,fp8,0,0.05853279829025269
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,40,4,128,1,float16,float16,0,0.06238719820976257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,40,8,128,1,float16,float16,0,0.008723200112581254
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,40,4,128,1,fp8,fp8,0,0.0582863986492157
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,40,8,128,1,float16,float16,0,0.06299039721488953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,40,8,128,1,float16,fp8,0,0.058100801706314084
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,40,40,128,1,float16,float16,0,0.03712800145149231
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,40,8,128,1,fp8,fp8,0,0.058376002311706546
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,40,40,128,1,float16,fp8,0,0.03296479880809784
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,40,1,128,1,float16,float16,0,0.03545759916305542
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,40,1,128,1,float16,fp8,0,0.03299199938774109
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,40,2,128,1,float16,float16,0,0.035627201199531555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,40,40,128,1,fp8,fp8,0,0.0330128014087677
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,40,2,128,1,float16,fp8,0,0.0330592006444931
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,40,2,128,1,fp8,fp8,0,0.03296320140361786
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,40,4,128,1,float16,float16,0,0.035971200466156004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,40,4,128,1,float16,fp8,0,0.033215999603271484
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,40,4,128,1,fp8,fp8,0,0.03295679986476898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,40,8,128,1,float16,float16,0,0.03604640066623688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,40,8,128,1,fp8,fp8,0,0.032979199290275575
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,40,8,128,1,float16,fp8,0,0.033188799023628236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,40,40,128,1,float16,float16,0,0.02276639938354492
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,40,40,128,1,float16,fp8,0,0.020755200088024138
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,40,40,128,1,fp8,fp8,0,0.02091040015220642
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,40,1,128,1,float16,float16,0,0.02268799990415573
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,40,1,128,1,float16,fp8,0,0.020737600326538087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,40,1,128,1,fp8,fp8,0,0.020692799985408784
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,40,2,128,1,float16,float16,0,0.0227743998169899
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,40,2,128,1,float16,fp8,0,0.02069759964942932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,40,2,128,1,fp8,fp8,0,0.020683200657367708
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,40,4,128,1,float16,float16,0,0.022745600342750548
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,40,4,128,1,float16,fp8,0,0.020744000375270844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,40,8,128,1,float16,float16,0,0.022644799947738648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,40,8,128,1,float16,fp8,0,0.020776000618934632
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,40,8,128,1,fp8,fp8,0,0.02072799950838089
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,40,40,128,1,float16,float16,0,0.016603200137615202
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,40,40,128,1,float16,fp8,0,0.015052799880504609
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,40,40,128,1,fp8,fp8,0,0.014558400213718414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,40,1,128,1,float16,float16,0,0.01658399999141693
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,40,1,128,1,fp8,fp8,0,0.014588800072669984
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,40,1,128,1,float16,fp8,0,0.014534400403499603
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,40,2,128,1,fp8,fp8,0,0.01459839940071106
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,40,4,128,1,float16,float16,0,0.01549919992685318
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,40,4,128,1,float16,fp8,0,0.014526399970054626
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,40,4,128,1,fp8,fp8,0,0.014561599493026734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,40,8,128,1,float16,float16,0,0.015600000321865082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,40,8,128,1,float16,fp8,0,0.014585599303245544
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,40,8,128,1,fp8,fp8,0,0.014627200365066529
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,40,1,128,1,float16,float16,0,0.11344319581985474
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,40,40,128,1,float16,float16,0,0.014134399592876434
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,40,40,128,1,float16,fp8,0,0.012511999905109405
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,40,40,128,1,fp8,fp8,0,0.012486399710178375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,40,1,128,1,float16,float16,0,0.012529599666595458
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,40,1,128,1,float16,fp8,0,0.01250240057706833
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,40,1,128,1,fp8,fp8,0,0.01244639977812767
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,40,2,128,1,float16,float16,0,0.012510399520397186
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,40,8,128,1,float16,float16,0,0.11431679725646973
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,40,2,128,1,fp8,fp8,0,0.012479999661445617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,40,4,128,1,float16,float16,0,0.012476799637079239
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,40,4,128,1,float16,fp8,0,0.012831999361515046
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,40,8,128,1,float16,float16,0,0.012484800070524216
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,40,8,128,1,float16,fp8,0,0.012521600723266602
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,40,8,128,1,fp8,fp8,0,0.012513600289821625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,40,4,128,1,float16,fp8,0,0.05829120278358459
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,40,40,128,1,float16,float16,0,0.010446400195360184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,40,40,128,1,float16,fp8,0,0.009940800070762635
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,40,40,128,1,fp8,fp8,0,0.009748800098896027
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,40,1,128,1,fp8,fp8,0,0.03298400044441223
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,40,1,128,1,float16,fp8,0,0.010326399654150008
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,40,1,128,1,fp8,fp8,0,0.010364799946546554
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,40,2,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,40,2,128,1,float16,fp8,0,0.010372799634933472
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,40,2,128,1,fp8,fp8,0,0.010371199995279311
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,40,4,128,1,float16,float16,0,0.010351999849081039
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,40,4,128,1,fp8,fp8,0,0.02078399956226349
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,40,4,128,1,float16,fp8,0,0.01037919968366623
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,40,4,128,1,fp8,fp8,0,0.01045759990811348
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,40,8,128,1,float16,float16,0,0.010401599854230881
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,40,8,128,1,float16,fp8,0,0.010436800122261048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,40,8,128,1,fp8,fp8,0,0.010371199995279311
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,40,40,128,1,float16,float16,0,0.01048159971833229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,40,40,128,1,float16,fp8,0,0.010339199751615524
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,40,2,128,1,float16,float16,0,0.01656000018119812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,40,40,128,1,fp8,fp8,0,0.009710399806499482
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,40,1,128,1,float16,float16,0,0.010390400141477584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,40,2,128,1,float16,fp8,0,0.014563199877738953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,40,1,128,1,fp8,fp8,0,0.00907519981265068
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,40,2,128,1,float16,float16,0,0.010344000160694122
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,40,2,128,1,float16,fp8,0,0.008433599770069123
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,40,2,128,1,fp8,fp8,0,0.009464000165462495
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,40,4,128,1,float16,float16,0,0.010358399897813796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,40,4,128,1,float16,fp8,0,0.008499199897050858
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,40,4,128,1,fp8,fp8,0,0.00963200032711029
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,40,8,128,1,float16,fp8,0,0.00841279998421669
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,40,8,128,1,fp8,fp8,0,0.009547200053930283
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,40,2,128,1,float16,fp8,0,0.012491200119256973
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,40,4,128,1,fp8,fp8,0,0.012467200309038163
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,40,1,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,40,1,128,1,float16,fp8,0,0.008430399745702744
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,40,8,128,1,float16,float16,0,0.010407999902963639
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,1,128,1,float16,fp8,0,11.153794860839843
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,1,128,1,fp8,fp8,0,11.099222564697266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,2,128,1,float16,fp8,0,11.2672607421875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,1,128,1,float16,float16,0,14.179098510742188
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,2,128,1,fp8,fp8,0,11.098697662353516
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,2,128,1,float16,float16,0,14.260118103027343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,4,128,1,float16,fp8,0,11.19616470336914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,4,128,1,float16,float16,0,14.268373107910156
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,4,128,1,fp8,fp8,0,11.46185760498047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,32,128,1,fp8,fp8,0,5.808158493041992
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,32,128,1,float16,fp8,0,5.778035354614258
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,8,128,1,float16,fp8,0,11.727561950683594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,1,128,1,float16,float16,0,7.400296020507812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,8,128,1,fp8,fp8,0,11.489473724365235
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,8,128,1,float16,float16,0,14.843788146972656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,1,128,1,float16,fp8,0,5.637744140625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,1,128,1,fp8,fp8,0,5.54126091003418
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,2,128,1,float16,fp8,0,5.8068992614746096
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,2,128,1,float16,float16,0,7.1875762939453125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,2,128,1,fp8,fp8,0,5.709241485595703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,4,128,1,float16,fp8,0,5.6950126647949215
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,4,128,1,float16,float16,0,7.280790710449219
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,4,128,1,fp8,fp8,0,5.721902465820312
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,32,128,1,float16,fp8,0,2.96910400390625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,32,128,1,fp8,fp8,0,3.279492950439453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,8,128,1,float16,fp8,0,5.803732681274414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,8,128,1,float16,float16,0,7.517892456054687
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,8,128,1,fp8,fp8,0,5.861518478393554
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,1,128,1,float16,float16,0,3.6130577087402345
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,1,128,1,float16,fp8,0,2.851911926269531
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,1,128,1,fp8,fp8,0,2.843764877319336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,2,128,1,fp8,fp8,0,2.9073488235473635
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,2,128,1,float16,fp8,0,3.2301136016845704
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,2,128,1,float16,float16,0,3.6783313751220703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,4,128,1,float16,float16,0,3.6750415802001952
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,4,128,1,fp8,fp8,0,2.843516731262207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,8,128,1,float16,fp8,0,2.821900749206543
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,8,128,1,float16,float16,0,3.6162113189697265
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,8,128,1,fp8,fp8,0,3.121561622619629
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,32,128,1,float16,fp8,0,1.5406959533691407
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,32,128,1,fp8,fp8,0,1.5603167533874511
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,1,128,1,float16,fp8,0,1.508664035797119
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,4,128,1,float16,fp8,0,2.856233596801758
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,1,128,1,fp8,fp8,0,1.4645440101623535
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,1,128,1,float16,float16,0,2.018971252441406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,2,128,1,float16,float16,0,1.7076992034912108
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,2,128,1,float16,fp8,0,1.4767855644226073
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,2,128,1,fp8,fp8,0,1.4621760368347168
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,4,128,1,float16,float16,0,1.7491600036621093
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,4,128,1,float16,fp8,0,1.6924848556518555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,8,128,1,float16,fp8,0,1.4635711669921876
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,4,128,1,fp8,fp8,0,1.6410160064697266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,8,128,1,float16,float16,0,1.7484207153320312
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,8,128,1,fp8,fp8,0,1.6585615158081055
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,1,128,1,float16,fp8,0,6.4433135986328125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,1,128,1,fp8,fp8,0,6.510736083984375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,1,128,1,float16,float16,0,8.413947296142577
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,2,128,1,float16,float16,0,8.473625946044923
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,2,128,1,float16,fp8,0,6.604084777832031
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,2,128,1,fp8,fp8,0,6.506003570556641
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,4,128,1,float16,fp8,0,6.519742584228515
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,4,128,1,float16,float16,0,8.510887908935548
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,4,128,1,fp8,fp8,0,6.721110534667969
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,8,128,1,float16,float16,0,8.296595001220703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,32,128,1,float16,fp8,0,3.4593326568603517
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,8,128,1,float16,fp8,0,6.640643310546875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,8,128,1,fp8,fp8,0,6.536547088623047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,32,128,1,fp8,fp8,0,3.4917343139648436
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,1,128,1,float16,float16,0,4.1428367614746096
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,1,128,1,float16,fp8,0,3.314072036743164
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,1,128,1,fp8,fp8,0,3.360340881347656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,2,128,1,float16,fp8,0,3.2018177032470705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,2,128,1,fp8,fp8,0,3.3385246276855467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,2,128,1,float16,float16,0,4.1032463073730465
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,4,128,1,float16,float16,0,4.211579132080078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,4,128,1,float16,fp8,0,3.2233104705810547
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,4,128,1,fp8,fp8,0,3.233118438720703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,8,128,1,float16,float16,0,4.228348922729492
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,8,128,1,fp8,fp8,0,3.2777633666992188
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,8,128,1,float16,fp8,0,3.617630386352539
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,32,128,1,float16,fp8,0,2.0988304138183596
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,32,128,1,fp8,fp8,0,1.7722976684570313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,1,128,1,float16,fp8,0,1.6775136947631837
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,1,128,1,float16,float16,0,2.0297679901123047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,1,128,1,fp8,fp8,0,1.6498399734497071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,2,128,1,float16,float16,0,1.9721376419067382
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,2,128,1,float16,fp8,0,1.887593650817871
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,2,128,1,fp8,fp8,0,1.6413984298706055
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,4,128,1,float16,fp8,0,1.7254751205444336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,4,128,1,float16,float16,0,2.053219223022461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,4,128,1,fp8,fp8,0,1.6699312210083008
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,8,128,1,float16,float16,0,1.9741424560546874
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,8,128,1,float16,fp8,0,1.8917247772216796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,8,128,1,fp8,fp8,0,1.66253604888916
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,32,128,1,float16,fp8,0,0.9571887969970703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,32,128,1,fp8,fp8,0,0.9583824157714844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,1,128,1,float16,float16,0,0.9909808158874511
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,1,128,1,float16,fp8,0,0.8812848091125488
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,1,128,1,fp8,fp8,0,0.9168416023254394
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,2,128,1,float16,float16,0,0.9940079689025879
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,2,128,1,float16,fp8,0,0.8938176155090332
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,2,128,1,fp8,fp8,0,0.9644351959228515
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,4,128,1,float16,float16,0,0.9959232330322265
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,4,128,1,float16,fp8,0,0.915396785736084
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,4,128,1,fp8,fp8,0,0.9397600173950196
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,8,128,1,float16,float16,0,1.0061792373657226
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,8,128,1,float16,fp8,0,0.8975695610046387
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,8,128,1,fp8,fp8,0,0.9118096351623535
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,1,128,1,float16,fp8,0,4.685780715942383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,1,128,1,float16,float16,0,5.756579208374023
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,1,128,1,fp8,fp8,0,4.700340652465821
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,2,128,1,float16,float16,0,5.88322868347168
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,2,128,1,float16,fp8,0,4.707777786254883
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,2,128,1,fp8,fp8,0,4.677624130249024
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,4,128,1,float16,float16,0,6.005883026123047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,4,128,1,float16,fp8,0,4.561419296264648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,4,128,1,fp8,fp8,0,4.75677604675293
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,8,128,1,float16,float16,0,5.898619079589844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,8,128,1,float16,fp8,0,4.6465198516845705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,8,128,1,fp8,fp8,0,4.753895950317383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,32,128,1,float16,fp8,0,2.5096559524536133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,32,128,1,fp8,fp8,0,2.481769561767578
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,1,128,1,float16,float16,0,2.7611440658569335
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,1,128,1,float16,fp8,0,2.5375728607177734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,1,128,1,fp8,fp8,0,2.3241920471191406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,2,128,1,float16,float16,0,2.7726192474365234
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,2,128,1,float16,fp8,0,2.532526397705078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,2,128,1,fp8,fp8,0,2.3051008224487304
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,4,128,1,float16,float16,0,2.785427284240723
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,4,128,1,float16,fp8,0,2.591856002807617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,4,128,1,fp8,fp8,0,2.311510467529297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,8,128,1,float16,float16,0,2.80283203125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,8,128,1,float16,fp8,0,2.5825424194335938
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,8,128,1,fp8,fp8,0,2.3144672393798826
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,32,128,1,float16,float16,0,1.0618351936340331
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,32,128,1,float16,float16,0,1.7546207427978515
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,32,128,1,float16,float16,0,2.032419204711914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,32,128,1,float16,fp8,0,1.352177619934082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,32,128,1,float16,float16,0,4.1924785614013675
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,32,128,1,fp8,fp8,0,1.4054800033569337
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,32,128,1,float16,float16,0,1.4512016296386718
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,1,128,1,float16,float16,0,1.5067520141601562
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,1,128,1,float16,fp8,0,1.2006143569946288
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,32,128,1,float16,float16,0,3.640292739868164
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,2,128,1,float16,fp8,0,1.2189120292663573
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,2,128,1,float16,float16,0,1.3345423698425294
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,1,128,1,fp8,fp8,0,1.3665072441101074
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,2,128,1,fp8,fp8,0,1.3807488441467286
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,32,128,1,float16,float16,0,7.365019226074219
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,4,128,1,float16,float16,0,1.40164155960083
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,4,128,1,float16,fp8,0,1.414356803894043
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,4,128,1,fp8,fp8,0,1.2095295906066894
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,8,128,1,float16,fp8,0,1.2197487831115723
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,8,128,1,float16,float16,0,1.457366371154785
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,32,128,1,float16,float16,0,0.8295472145080567
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,8,128,1,fp8,fp8,0,1.2215696334838868
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,32,128,1,float16,fp8,0,0.7762688159942627
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,1,128,1,float16,float16,0,0.7047423839569091
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,32,128,1,fp8,fp8,0,0.7357391834259033
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,1,128,1,float16,fp8,0,0.636359977722168
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,1,128,1,fp8,fp8,0,0.7509583950042724
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,2,128,1,float16,float16,0,0.7696383953094482
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,2,128,1,float16,fp8,0,0.6402976036071777
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,2,128,1,fp8,fp8,0,0.665011215209961
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,32,128,1,float16,float16,0,2.8934864044189452
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,4,128,1,float16,fp8,0,0.6414527893066406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,4,128,1,float16,float16,0,0.7102799892425538
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,4,128,1,fp8,fp8,0,0.6366928100585938
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,8,128,1,float16,float16,0,0.7375487804412841
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,8,128,1,float16,fp8,0,0.6855088233947754
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,8,128,1,fp8,fp8,0,0.6415760040283203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,1,128,1,float16,fp8,0,5.925872039794922
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,1,128,1,fp8,fp8,0,5.967825698852539
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,2,128,1,float16,fp8,0,5.913129425048828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,2,128,1,fp8,fp8,0,5.954412841796875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,1,128,1,float16,float16,0,7.4375053405761715
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,2,128,1,float16,float16,0,7.459913635253907
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,32,128,1,float16,fp8,0,3.397614288330078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,32,128,1,float16,float16,0,4.089336013793945
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,4,128,1,float16,fp8,0,6.042532730102539
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,4,128,1,fp8,fp8,0,6.012059020996094
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,8,128,1,float16,fp8,0,6.084368133544922
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,8,128,1,fp8,fp8,0,6.034513473510742
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,8,128,1,float16,float16,0,7.897390747070313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,32,128,1,fp8,fp8,0,3.296852874755859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,1,128,1,float16,fp8,0,3.0235023498535156
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,1,128,1,fp8,fp8,0,2.991894340515137
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,1,128,1,float16,float16,0,3.8360065460205077
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,2,128,1,float16,fp8,0,3.0306447982788085
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,2,128,1,float16,float16,0,3.7039230346679686
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,2,128,1,fp8,fp8,0,3.194105529785156
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,4,128,1,float16,float16,0,3.708812713623047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,4,128,1,fp8,fp8,0,2.9788911819458006
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,4,128,1,float16,fp8,0,3.0589839935302736
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,8,128,1,float16,fp8,0,3.0833503723144533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,4,128,1,float16,float16,0,7.556977844238281
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,32,128,1,float16,float16,0,1.9652175903320312
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,8,128,1,float16,float16,0,3.849079895019531
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,8,128,1,fp8,fp8,0,3.248302459716797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,32,128,1,fp8,fp8,0,1.63635196685791
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,1,128,1,float16,float16,0,1.7291839599609375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,32,128,1,float16,fp8,0,1.8683855056762695
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,1,128,1,float16,fp8,0,1.5506575584411622
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,1,128,1,fp8,fp8,0,1.5097887992858887
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,2,128,1,fp8,fp8,0,1.5485088348388671
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,2,128,1,float16,fp8,0,1.7583904266357422
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,2,128,1,float16,float16,0,1.9293615341186523
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,4,128,1,fp8,fp8,0,1.5630784034729004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,4,128,1,float16,float16,0,1.85655517578125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,4,128,1,float16,fp8,0,1.784649658203125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,8,128,1,float16,fp8,0,1.5106800079345704
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,32,128,1,float16,float16,0,0.9816080093383789
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,8,128,1,float16,float16,0,1.7967151641845702
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,32,128,1,float16,fp8,0,0.9890560150146485
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,32,128,1,fp8,fp8,0,0.8618016242980957
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,1,128,1,float16,fp8,0,0.8413279533386231
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,1,128,1,fp8,fp8,0,0.7903535842895508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,1,128,1,float16,float16,0,1.0684255599975585
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,2,128,1,float16,float16,0,0.9034336090087891
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,2,128,1,fp8,fp8,0,0.8583663940429688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,2,128,1,float16,fp8,0,1.00830078125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,4,128,1,float16,float16,0,0.8947903633117675
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,4,128,1,float16,fp8,0,0.8101967811584473
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,4,128,1,fp8,fp8,0,0.7902160167694092
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,32,128,1,float16,float16,0,0.560974407196045
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,8,128,1,float16,float16,0,1.0552559852600099
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,8,128,1,fp8,fp8,0,0.8186112403869629
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,8,128,1,float16,fp8,0,0.9269760131835938
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,32,128,1,fp8,fp8,0,0.4612127780914307
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,32,128,1,float16,fp8,0,0.5191152095794678
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,1,128,1,float16,fp8,0,0.4543776035308838
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,1,128,1,fp8,fp8,0,0.5077263832092285
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,2,128,1,fp8,fp8,0,0.4327824115753174
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,2,128,1,float16,fp8,0,0.4706111907958984
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,4,128,1,float16,float16,0,0.48551359176635744
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,4,128,1,float16,fp8,0,0.45467357635498046
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,4,128,1,fp8,fp8,0,0.5061007976531983
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,8,128,1,float16,fp8,0,0.4628911972045898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,8,128,1,fp8,fp8,0,0.4601871967315674
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,8,128,1,fp8,fp8,0,1.5266431808471679
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,1,128,1,float16,float16,0,0.46877760887145997
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,2,128,1,float16,float16,0,0.4748879909515381
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,1,128,1,float16,fp8,0,3.577500915527344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,1,128,1,fp8,fp8,0,3.513675308227539
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,8,128,1,float16,float16,0,0.4823296070098877
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,1,128,1,float16,float16,0,4.313590240478516
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,2,128,1,float16,float16,0,4.311980819702148
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,2,128,1,float16,fp8,0,3.4392623901367188
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,2,128,1,fp8,fp8,0,3.4382816314697267
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,32,128,1,float16,float16,0,2.310820770263672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,4,128,1,fp8,fp8,0,3.4940399169921874
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,4,128,1,float16,fp8,0,3.5872737884521486
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,4,128,1,float16,float16,0,4.252972793579102
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,8,128,1,float16,fp8,0,3.6308399200439454
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,8,128,1,float16,float16,0,4.320294570922852
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,32,128,1,float16,fp8,0,1.9273216247558593
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,8,128,1,fp8,fp8,0,3.632217788696289
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,1,128,1,float16,fp8,0,1.7939247131347655
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,32,128,1,fp8,fp8,0,2.1198400497436523
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,1,128,1,float16,float16,0,2.0447872161865233
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,1,128,1,fp8,fp8,0,1.9969791412353515
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,2,128,1,float16,float16,0,2.0158239364624024
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,2,128,1,fp8,fp8,0,1.758438491821289
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,2,128,1,float16,fp8,0,2.029710388183594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,4,128,1,float16,float16,0,2.132956886291504
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,4,128,1,float16,fp8,0,1.7689136505126952
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,4,128,1,fp8,fp8,0,1.760513687133789
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,8,128,1,float16,fp8,0,1.7745359420776368
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,32,128,1,float16,float16,0,1.1408464431762695
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,8,128,1,float16,float16,0,2.2631904602050783
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,32,128,1,float16,fp8,0,1.2282976150512694
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,8,128,1,fp8,fp8,0,2.0247440338134766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,32,128,1,fp8,fp8,0,0.9985967636108398
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,1,128,1,float16,float16,0,0.9996208190917969
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,1,128,1,float16,fp8,0,0.9080032348632813
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,1,128,1,fp8,fp8,0,1.0586175918579102
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,2,128,1,float16,fp8,0,0.9465583801269531
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,2,128,1,float16,float16,0,1.128206443786621
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,2,128,1,fp8,fp8,0,0.936195182800293
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,4,128,1,fp8,fp8,0,0.9041983604431152
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,4,128,1,float16,fp8,0,1.0028160095214844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,8,128,1,float16,float16,0,1.040940761566162
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,8,128,1,float16,fp8,0,0.921348762512207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,32,128,1,float16,fp8,0,0.5177840232849121
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,32,128,1,float16,float16,0,0.5907408237457276
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,32,128,1,fp8,fp8,0,0.5511216163635254
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,8,128,1,fp8,fp8,0,1.0418080329895019
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,1,128,1,float16,float16,0,0.5755727767944336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,1,128,1,float16,fp8,0,0.4789696216583252
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,1,128,1,fp8,fp8,0,0.4756608009338379
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,2,128,1,float16,fp8,0,0.49034719467163085
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,2,128,1,float16,float16,0,0.5282671928405762
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,2,128,1,fp8,fp8,0,0.5311408042907715
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,4,128,1,float16,float16,0,0.5614096164703369
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,4,128,1,float16,fp8,0,0.47892961502075193
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,4,128,1,fp8,fp8,0,0.4757904052734375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,8,128,1,float16,fp8,0,0.49863362312316895
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,8,128,1,fp8,fp8,0,0.4976352214813232
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,32,128,1,float16,float16,0,0.3417887926101685
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,32,128,1,float16,fp8,0,0.2880000114440918
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,1,128,1,float16,float16,0,0.31760640144348146
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,1,128,1,fp8,fp8,0,0.2984127998352051
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,2,128,1,float16,float16,0,0.2830192089080811
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,2,128,1,float16,fp8,0,0.28320798873901365
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,2,128,1,fp8,fp8,0,0.2802992105484009
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,4,128,1,float16,float16,0,0.2867503881454468
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,4,128,1,float16,fp8,0,0.2645695924758911
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,4,128,1,fp8,fp8,0,0.28346240520477295
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,8,128,1,float16,float16,0,0.3016688108444214
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,8,128,1,float16,fp8,0,0.2672399997711182
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,8,128,1,fp8,fp8,0,0.26605920791625975
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,4,128,1,float16,float16,0,1.0464207649230957
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,8,128,1,float16,float16,0,0.5382063865661622
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,32,128,1,fp8,fp8,0,0.28711519241333006
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,1,128,1,float16,fp8,0,0.2721168041229248
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,1,128,1,fp8,fp8,0,3.3009056091308593
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,1,128,1,float16,fp8,0,3.367283248901367
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,1,128,1,float16,float16,0,4.007281494140625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,2,128,1,float16,fp8,0,3.327276611328125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,2,128,1,float16,float16,0,3.990500640869141
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,2,128,1,fp8,fp8,0,3.2999534606933594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,4,128,1,float16,fp8,0,3.298023986816406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,32,128,1,float16,float16,0,2.371321678161621
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,4,128,1,fp8,fp8,0,3.3389007568359377
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,4,128,1,float16,float16,0,4.018471908569336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,8,128,1,fp8,fp8,0,3.368145751953125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,8,128,1,float16,fp8,0,3.4931392669677734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,8,128,1,float16,float16,0,4.267731094360352
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,32,128,1,fp8,fp8,0,1.8915935516357423
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,1,128,1,float16,fp8,0,1.6822879791259766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,1,128,1,float16,float16,0,1.947697639465332
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,1,128,1,fp8,fp8,0,2.0115407943725585
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,2,128,1,float16,fp8,0,1.6911407470703126
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,2,128,1,fp8,fp8,0,1.8880064010620117
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,4,128,1,float16,float16,0,1.9220224380493165
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,4,128,1,float16,fp8,0,1.6633184432983399
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,4,128,1,fp8,fp8,0,1.832632064819336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,8,128,1,float16,fp8,0,1.672982406616211
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,8,128,1,float16,float16,0,1.942518424987793
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,32,128,1,float16,fp8,0,1.8830415725708007
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,32,128,1,float16,fp8,0,0.9819952011108398
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,32,128,1,float16,float16,0,1.2418383598327636
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,8,128,1,fp8,fp8,0,1.6854480743408202
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,32,128,1,fp8,fp8,0,0.9785344123840332
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,1,128,1,float16,float16,0,0.945041561126709
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,1,128,1,float16,fp8,0,1.0014528274536132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,1,128,1,fp8,fp8,0,0.8510959625244141
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,2,128,1,float16,float16,0,0.9391440391540528
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,2,128,1,fp8,fp8,0,0.8527039527893067
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,2,128,1,float16,fp8,0,1.0391488075256348
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,4,128,1,float16,float16,0,0.9562000274658203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,2,128,1,float16,float16,0,1.9059423446655273
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,4,128,1,float16,fp8,0,0.8513376235961914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,4,128,1,fp8,fp8,0,0.9018704414367675
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,32,128,1,float16,float16,0,0.5835855960845947
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,8,128,1,float16,fp8,0,0.8533583641052246
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,8,128,1,float16,float16,0,0.9653247833251953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,8,128,1,fp8,fp8,0,0.8524191856384278
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,32,128,1,fp8,fp8,0,0.5073071956634522
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,1,128,1,float16,float16,0,0.499022388458252
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,1,128,1,float16,fp8,0,0.46083998680114746
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,1,128,1,fp8,fp8,0,0.4848127841949463
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,2,128,1,float16,fp8,0,0.44667840003967285
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,2,128,1,fp8,fp8,0,0.4457727909088135
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,4,128,1,float16,float16,0,0.49823360443115233
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,4,128,1,float16,fp8,0,0.46966400146484377
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,4,128,1,fp8,fp8,0,0.4524384021759033
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,8,128,1,float16,fp8,0,0.44701437950134276
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,32,128,1,float16,float16,0,0.306494402885437
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,32,128,1,float16,fp8,0,0.27341599464416505
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,8,128,1,fp8,fp8,0,0.4457727909088135
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,32,128,1,fp8,fp8,0,0.2981312036514282
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,1,128,1,float16,float16,0,0.2625024080276489
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,1,128,1,float16,fp8,0,0.2430032014846802
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,2,128,1,float16,fp8,0,0.2455888032913208
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,2,128,1,fp8,fp8,0,0.24303839206695557
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,4,128,1,float16,float16,0,0.26315999031066895
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,4,128,1,float16,fp8,0,0.24311680793762208
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,4,128,1,fp8,fp8,0,0.2461024045944214
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,8,128,1,float16,float16,0,0.26823360919952394
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,8,128,1,float16,fp8,0,0.2442095994949341
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,8,128,1,fp8,fp8,0,0.24377760887145997
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,32,128,1,float16,float16,0,0.1691648006439209
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,32,128,1,float16,fp8,0,0.15665119886398315
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,32,128,1,fp8,fp8,0,0.15600639581680298
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,1,128,1,float16,float16,0,0.14865920543670655
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,32,128,1,float16,fp8,0,0.501580810546875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,1,128,1,float16,fp8,0,0.14110080003738404
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,2,128,1,float16,float16,0,0.15028320550918578
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,2,128,1,float16,fp8,0,0.1409775972366333
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,2,128,1,fp8,fp8,0,0.1421007990837097
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,2,128,1,float16,float16,0,0.5128960132598877
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,4,128,1,float16,fp8,0,0.14211039543151854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,4,128,1,fp8,fp8,0,0.14179680347442628
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,8,128,1,float16,float16,0,0.1532528042793274
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,8,128,1,float16,fp8,0,0.1422271966934204
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,8,128,1,float16,float16,0,0.5039696216583252
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,8,128,1,fp8,fp8,0,0.14246560335159303
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,1,128,1,fp8,fp8,0,0.24403040409088134
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,2,128,1,float16,float16,0,0.2660975933074951
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,1,128,1,float16,fp8,0,1.9807088851928711
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,1,128,1,fp8,fp8,0,1.9827520370483398
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,1,128,1,float16,float16,0,2.2715391159057616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,1,128,1,fp8,fp8,0,0.14112160205841065
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,2,128,1,float16,float16,0,2.2100223541259765
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,2,128,1,float16,fp8,0,1.9864320755004883
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,4,128,1,float16,float16,0,0.14890400171279908
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,2,128,1,fp8,fp8,0,1.9803983688354492
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,4,128,1,float16,float16,0,2.241876792907715
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,4,128,1,fp8,fp8,0,1.9825168609619142
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,32,128,1,float16,float16,0,1.3656991958618163
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,4,128,1,float16,fp8,0,2.19005126953125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,8,128,1,fp8,fp8,0,1.9866207122802735
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,8,128,1,float16,float16,0,2.361204719543457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,8,128,1,float16,fp8,0,2.220542335510254
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,32,128,1,float16,fp8,0,1.1643168449401855
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,1,128,1,float16,float16,0,1.1051983833312988
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,1,128,1,fp8,fp8,0,1.0304256439208985
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,1,128,1,float16,fp8,0,1.0579263687133789
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,32,128,1,fp8,fp8,0,1.3452303886413575
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,2,128,1,float16,float16,0,1.096840000152588
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,2,128,1,fp8,fp8,0,1.0227968215942382
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,2,128,1,float16,fp8,0,1.2767760276794433
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,4,128,1,float16,float16,0,1.1425344467163085
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,4,128,1,float16,fp8,0,1.097761631011963
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,4,128,1,fp8,fp8,0,1.0139391899108887
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,8,128,1,float16,float16,0,1.163148784637451
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,8,128,1,float16,fp8,0,1.0439567565917969
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,32,128,1,float16,float16,0,0.6881807804107666
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,32,128,1,float16,fp8,0,0.6444015979766846
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,8,128,1,fp8,fp8,0,1.0138256072998046
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,1,128,1,float16,fp8,0,0.5228864192962647
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,1,128,1,float16,float16,0,0.566534423828125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,1,128,1,fp8,fp8,0,0.5273280143737793
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,2,128,1,float16,float16,0,0.5664144039154053
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,2,128,1,fp8,fp8,0,0.5293504238128662
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,4,128,1,float16,float16,0,0.5771584033966064
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,4,128,1,float16,fp8,0,0.5220799922943116
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,4,128,1,fp8,fp8,0,0.5363952159881592
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,8,128,1,float16,float16,0,0.6076288223266602
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,32,128,1,float16,float16,0,0.3583024024963379
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,8,128,1,fp8,fp8,0,0.5303311824798584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,32,128,1,float16,fp8,0,0.3189183950424194
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,32,128,1,fp8,fp8,0,0.32044799327850343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,1,128,1,float16,float16,0,0.30846400260925294
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,1,128,1,float16,fp8,0,0.28934080600738527
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,1,128,1,fp8,fp8,0,0.28227839469909666
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,2,128,1,float16,fp8,0,0.2786751985549927
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,2,128,1,fp8,fp8,0,0.29985120296478274
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,4,128,1,float16,float16,0,0.3014672040939331
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,4,128,1,float16,fp8,0,0.2805855989456177
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,8,128,1,float16,float16,0,0.31108798980712893
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,8,128,1,fp8,fp8,0,0.27815680503845214
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,32,128,1,float16,float16,0,0.19643039703369142
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,32,128,1,float16,fp8,0,0.17628639936447144
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,1,128,1,float16,float16,0,0.16162400245666503
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,1,128,1,float16,fp8,0,0.154420804977417
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,1,128,1,fp8,fp8,0,0.15371040105819703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,2,128,1,float16,float16,0,0.16236000061035155
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,32,128,1,fp8,fp8,0,0.7065968036651611
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,2,128,1,float16,fp8,0,0.15451200008392335
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,2,128,1,float16,fp8,0,0.522982406616211
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,4,128,1,float16,float16,0,0.165339195728302
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,4,128,1,float16,fp8,0,0.15392160415649414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,4,128,1,fp8,fp8,0,0.15448800325393677
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,8,128,1,float16,float16,0,0.16830559968948364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,8,128,1,float16,fp8,0,0.5313839912414551
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,8,128,1,float16,fp8,0,0.155622398853302
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,32,128,1,float16,float16,0,0.11297279596328735
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,2,128,1,float16,float16,0,0.29106879234313965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,32,128,1,float16,fp8,0,0.10405440330505371
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,32,128,1,fp8,fp8,0,0.10369759798049927
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,1,128,1,float16,float16,0,0.09800639748573303
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,4,128,1,fp8,fp8,0,0.2784960031509399
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,1,128,1,fp8,fp8,0,0.09320160150527954
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,2,128,1,float16,float16,0,0.0982208013534546
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,2,128,1,float16,fp8,0,0.09343519806861877
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,8,128,1,float16,fp8,0,0.28343360424041747
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,2,128,1,fp8,fp8,0,0.09319519996643066
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,4,128,1,float16,float16,0,0.09891200065612793
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,4,128,1,float16,fp8,0,0.09446240067481995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,4,128,1,fp8,fp8,0,0.09336959719657897
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,8,128,1,float16,float16,0,0.10121760368347169
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,8,128,1,fp8,fp8,0,0.09340159893035889
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,32,128,1,fp8,fp8,0,0.1826464056968689
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,2,128,1,fp8,fp8,0,0.15331519842147828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,1,128,1,float16,fp8,0,1.9831727981567382
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,8,128,1,fp8,fp8,0,0.15428800582885743
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,1,128,1,fp8,fp8,0,1.9832992553710938
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,1,128,1,float16,fp8,0,0.09332159757614136
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,2,128,1,float16,float16,0,2.161800003051758
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,2,128,1,float16,fp8,0,1.9834768295288085
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,2,128,1,fp8,fp8,0,1.982708740234375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,4,128,1,float16,float16,0,2.272974395751953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,4,128,1,float16,fp8,0,1.9858207702636719
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,1,128,1,float16,float16,0,2.183758354187012
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,4,128,1,fp8,fp8,0,2.2387935638427736
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,32,128,1,float16,fp8,0,1.218238353729248
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,32,128,1,float16,float16,0,1.4152768135070801
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,8,128,1,float16,float16,0,2.353196716308594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,8,128,1,fp8,fp8,0,2.1376991271972656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,1,128,1,float16,float16,0,1.0817440032958985
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,8,128,1,float16,fp8,0,0.09424319863319397
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,32,128,1,fp8,fp8,0,1.2164912223815918
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,1,128,1,float16,fp8,0,1.0044400215148925
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,1,128,1,fp8,fp8,0,1.071126365661621
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,2,128,1,float16,float16,0,1.225553607940674
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,2,128,1,float16,fp8,0,1.0189455986022948
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,4,128,1,float16,float16,0,1.0893903732299806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,2,128,1,fp8,fp8,0,1.1554944038391113
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,4,128,1,fp8,fp8,0,1.0050239562988281
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,8,128,1,float16,float16,0,1.1482768058776855
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,32,128,1,float16,fp8,0,0.6646624088287354
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,8,128,1,float16,fp8,0,1.197201633453369
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,32,128,1,float16,float16,0,0.7072864055633545
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,8,128,1,fp8,fp8,0,1.0077280044555663
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,32,128,1,fp8,fp8,0,0.6219615936279297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,1,128,1,float16,float16,0,0.565393590927124
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,1,128,1,float16,fp8,0,0.5246560096740722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,1,128,1,fp8,fp8,0,0.540225601196289
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,2,128,1,float16,fp8,0,0.5139039993286133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,8,128,1,float16,fp8,0,1.984035110473633
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,2,128,1,float16,float16,0,0.5663648128509522
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,4,128,1,float16,float16,0,0.5764143943786622
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,4,128,1,float16,fp8,0,0.5237088203430176
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,4,128,1,fp8,fp8,0,0.5288735866546631
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,32,128,1,float16,float16,0,0.373308801651001
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,8,128,1,fp8,fp8,0,0.5412960052490234
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,32,128,1,float16,fp8,0,0.3272608041763306
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,32,128,1,fp8,fp8,0,0.33019840717315674
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,1,128,1,float16,float16,0,0.29057281017303466
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,1,128,1,float16,fp8,0,0.2840384006500244
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,1,128,1,fp8,fp8,0,0.27088959217071534
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,2,128,1,float16,float16,0,0.283355188369751
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,2,128,1,float16,fp8,0,0.2871295928955078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,2,128,1,fp8,fp8,0,0.28425920009613037
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,4,128,1,float16,float16,0,0.2880160093307495
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,4,128,1,float16,fp8,0,0.2771935939788818
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,4,128,1,fp8,fp8,0,0.2797919988632202
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,8,128,1,float16,float16,0,0.3029263973236084
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,4,128,1,float16,fp8,0,1.0038111686706543
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,32,128,1,float16,float16,0,0.19526879787445067
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,32,128,1,float16,fp8,0,0.17826399803161622
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,8,128,1,fp8,fp8,0,0.2764208078384399
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,32,128,1,fp8,fp8,0,0.17517119646072388
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,1,128,1,float16,float16,0,0.15586719512939454
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,1,128,1,fp8,fp8,0,0.14862879514694213
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,2,128,1,float16,float16,0,0.153711998462677
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,2,128,1,float16,fp8,0,0.1508095979690552
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,2,128,1,fp8,fp8,0,0.14812480211257933
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,4,128,1,float16,float16,0,0.15671679973602295
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,4,128,1,float16,fp8,0,0.1505136013031006
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,4,128,1,fp8,fp8,0,0.14839680194854737
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,8,128,1,float16,float16,0,0.16193759441375732
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,8,128,1,float16,fp8,0,0.1513535976409912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,8,128,1,fp8,fp8,0,0.1492319941520691
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,32,128,1,float16,float16,0,0.1097216010093689
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,32,128,1,float16,fp8,0,0.10235840082168579
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,32,128,1,fp8,fp8,0,0.10085599422454834
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,1,128,1,float16,fp8,0,0.08733279705047607
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,1,128,1,fp8,fp8,0,0.08583840131759643
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,2,128,1,fp8,fp8,0,0.5146768093109131
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,2,128,1,float16,float16,0,0.09133759737014771
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,2,128,1,float16,fp8,0,0.08640319705009461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,2,128,1,fp8,fp8,0,0.08615520000457763
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,4,128,1,float16,fp8,0,0.08646079897880554
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,4,128,1,fp8,fp8,0,0.08642399907112122
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,8,128,1,float16,fp8,0,0.5141215801239014
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,8,128,1,float16,float16,0,0.09444320201873779
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,8,128,1,float16,fp8,0,0.08620960116386414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,8,128,1,fp8,fp8,0,0.08646399974822998
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,32,128,1,float16,float16,0,0.06797599792480469
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,32,128,1,float16,fp8,0,0.061457598209381105
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,32,128,1,fp8,fp8,0,0.061686402559280394
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,1,128,1,float16,float16,0,0.05799199938774109
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,1,128,1,float16,fp8,0,0.055478399991989134
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,1,128,1,fp8,fp8,0,0.05499839782714844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,2,128,1,float16,float16,0,0.05858880281448364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,2,128,1,float16,fp8,0,0.05498719811439514
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,2,128,1,fp8,fp8,0,0.0554751992225647
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,4,128,1,float16,fp8,0,0.05544000267982483
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,4,128,1,fp8,fp8,0,0.055516797304153445
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,8,128,1,float16,fp8,0,0.2719552040100098
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,8,128,1,float16,float16,0,0.059939199686050416
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,8,128,1,float16,fp8,0,0.054897600412368776
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,8,128,1,fp8,fp8,0,0.055559998750686644
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,1,128,1,float16,fp8,0,0.14809119701385498
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,1,128,1,float16,float16,0,0.08980159759521485
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,1,128,1,float16,float16,0,1.3219920158386231
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,1,128,1,float16,fp8,0,1.2483648300170898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,1,128,1,fp8,fp8,0,1.2510064125061036
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,4,128,1,float16,float16,0,0.09207680225372314
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,8,128,1,float16,float16,0,0.5823311805725098
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,2,128,1,float16,float16,0,1.3484399795532227
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,2,128,1,float16,fp8,0,1.3008591651916503
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,2,128,1,fp8,fp8,0,1.2477616310119628
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,4,128,1,float16,float16,0,1.3391648292541505
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,4,128,1,float16,fp8,0,1.2459808349609376
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,4,128,1,float16,float16,0,0.059196799993515015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,4,128,1,fp8,fp8,0,1.2483183860778808
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,8,128,1,float16,float16,0,1.4245696067810059
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,32,128,1,float16,float16,0,0.9139375686645508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,32,128,1,float16,fp8,0,0.8731264114379883
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,1,128,1,float16,fp8,0,0.6344048023223877
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,8,128,1,float16,fp8,0,1.3357711791992188
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,8,128,1,fp8,fp8,0,1.2659584045410157
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,1,128,1,fp8,fp8,0,0.6334735870361328
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,2,128,1,float16,float16,0,0.6694143772125244
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,2,128,1,float16,fp8,0,0.6677487850189209
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,2,128,1,fp8,fp8,0,0.6344592094421386
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,4,128,1,float16,float16,0,0.6928864002227784
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,4,128,1,float16,fp8,0,0.6369200229644776
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,4,128,1,fp8,fp8,0,0.6696063995361328
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,8,128,1,float16,float16,0,0.7155312061309814
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,32,128,1,float16,fp8,0,0.40835041999816896
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,32,128,1,float16,float16,0,0.4661968231201172
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,8,128,1,float16,fp8,0,0.6417312145233154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,8,128,1,fp8,fp8,0,0.6352047920227051
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,32,128,1,fp8,fp8,0,0.4216752052307129
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,1,128,1,float16,fp8,0,0.3270960092544556
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,1,128,1,float16,float16,0,0.36801440715789796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,1,128,1,fp8,fp8,0,0.33394880294799806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,2,128,1,float16,fp8,0,0.32793760299682617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,2,128,1,fp8,fp8,0,0.33887040615081787
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,4,128,1,float16,fp8,0,0.3272608041763306
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,4,128,1,float16,float16,0,0.3767519950866699
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,8,128,1,float16,float16,0,0.36009600162506106
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,8,128,1,float16,fp8,0,0.3313119888305664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,32,128,1,float16,fp8,0,0.21540639400482178
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,8,128,1,fp8,fp8,0,0.32837278842926027
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,32,128,1,fp8,fp8,0,0.2162463903427124
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,1,128,1,float16,float16,0,0.17760640382766724
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,1,128,1,float16,fp8,0,0.17541600465774537
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,1,128,1,fp8,fp8,0,0.17575680017471312
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,2,128,1,float16,float16,0,0.17811199426651
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,2,128,1,float16,fp8,0,0.17470240592956543
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,2,128,1,fp8,fp8,0,0.1756160020828247
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,1,128,1,float16,float16,0,0.6573616027832031
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,32,128,1,fp8,fp8,0,0.7921472072601319
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,4,128,1,float16,fp8,0,0.17486239671707154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,4,128,1,fp8,fp8,0,0.17596960067749023
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,8,128,1,float16,float16,0,0.19012800455093384
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,8,128,1,float16,fp8,0,0.17547359466552734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,32,128,1,float16,float16,0,0.13021600246429443
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,8,128,1,fp8,fp8,0,0.17496479749679567
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,32,128,1,float16,fp8,0,0.12011040449142456
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,32,128,1,fp8,fp8,0,0.11898080110549927
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,1,128,1,float16,float16,0,0.10166399478912354
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,1,128,1,fp8,fp8,0,0.09774399995803833
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,2,128,1,float16,float16,0,0.10181119441986083
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,2,128,1,float16,fp8,0,0.09845600128173829
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,2,128,1,fp8,fp8,0,0.09781119823455811
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,4,128,1,float16,float16,0,0.10362080335617066
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,4,128,1,float16,fp8,0,0.09781439900398255
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,4,128,1,fp8,fp8,0,0.0982047975063324
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,8,128,1,float16,float16,0,0.10705440044403076
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,8,128,1,float16,fp8,0,0.09857439994812012
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,8,128,1,fp8,fp8,0,0.0985647976398468
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,32,128,1,float16,fp8,0,0.06862879991531372
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,32,128,1,fp8,fp8,0,0.06903200149536133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,1,128,1,float16,float16,0,0.061536002159118655
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,1,128,1,float16,fp8,0,0.058905601501464844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,1,128,1,fp8,fp8,0,0.058518397808074954
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,2,128,1,float16,float16,0,0.06114879846572876
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,2,128,1,float16,fp8,0,0.05859360098838806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,2,128,1,fp8,fp8,0,0.05847679972648621
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,4,128,1,float16,float16,0,0.061921602487564086
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,2,128,1,float16,float16,0,0.33564159870147703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,4,128,1,float16,fp8,0,0.05930240154266357
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,4,128,1,fp8,fp8,0,0.0586031973361969
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,8,128,1,float16,float16,0,0.0640608012676239
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,8,128,1,float16,fp8,0,0.05920799970626831
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,8,128,1,fp8,fp8,0,0.059115201234817505
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,4,128,1,fp8,fp8,0,0.33376638889312743
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,32,128,1,float16,fp8,0,0.04376960098743439
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,32,128,1,fp8,fp8,0,0.043572801351547244
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,1,128,1,float16,float16,0,0.04116640090942383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,32,128,1,float16,float16,0,0.25329439640045165
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,1,128,1,fp8,fp8,0,0.03905600011348724
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,2,128,1,float16,float16,0,0.041177600622177124
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,2,128,1,float16,fp8,0,0.039110401272773744
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,2,128,1,fp8,fp8,0,0.039136001467704774
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,4,128,1,float16,fp8,0,0.03914400041103363
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,4,128,1,fp8,fp8,0,0.039136001467704774
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,8,128,1,float16,float16,0,0.04219039976596832
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,8,128,1,float16,fp8,0,0.03913759887218475
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,4,128,1,float16,float16,0,0.18201279640197754
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,8,128,1,fp8,fp8,0,0.039134401082992556
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,1,128,1,float16,fp8,0,0.09768959879875183
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,32,128,1,float16,float16,0,0.07564960122108459
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,1,128,1,float16,fp8,0,1.330238437652588
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,1,128,1,fp8,fp8,0,1.3290271759033203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,32,128,1,float16,float16,0,0.045351999998092654
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,1,128,1,float16,fp8,0,0.039131200313568114
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,2,128,1,float16,float16,0,1.3736831665039062
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,2,128,1,float16,fp8,0,1.3293951988220214
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,4,128,1,float16,float16,0,0.041247999668121337
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,2,128,1,fp8,fp8,0,1.3282928466796875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,4,128,1,float16,float16,0,1.3907648086547852
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,4,128,1,float16,fp8,0,1.3310720443725585
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,4,128,1,fp8,fp8,0,1.377609634399414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,8,128,1,float16,fp8,0,1.3308544158935547
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,8,128,1,float16,float16,0,1.5001839637756347
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,8,128,1,fp8,fp8,0,1.3296527862548828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,32,128,1,float16,float16,0,1.0201408386230468
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,1,128,1,float16,float16,0,1.3566351890563966
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,32,128,1,float16,fp8,0,0.885478401184082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,32,128,1,fp8,fp8,0,0.8929920196533203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,1,128,1,float16,fp8,0,0.6844783782958984
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,1,128,1,fp8,fp8,0,0.674177598953247
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,2,128,1,float16,float16,0,0.6930079936981202
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,2,128,1,fp8,fp8,0,0.6739232063293457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,2,128,1,float16,fp8,0,0.7694320201873779
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,4,128,1,float16,float16,0,0.7218160152435302
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,4,128,1,float16,fp8,0,0.6738592147827148
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,4,128,1,fp8,fp8,0,0.6961567878723145
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,8,128,1,float16,float16,0,0.744432020187378
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,8,128,1,float16,fp8,0,0.6744416236877442
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,32,128,1,float16,fp8,0,0.4733712196350098
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,8,128,1,fp8,fp8,0,0.6742159843444824
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,1,128,1,float16,float16,0,0.3512768030166626
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,1,128,1,fp8,fp8,0,0.35144801139831544
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,2,128,1,float16,float16,0,0.35737760066986085
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,2,128,1,float16,fp8,0,0.34608480930328367
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,2,128,1,fp8,fp8,0,0.35174078941345216
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,4,128,1,float16,float16,0,0.3609488010406494
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,4,128,1,float16,fp8,0,0.3628848075866699
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,4,128,1,fp8,fp8,0,0.34531359672546386
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,8,128,1,float16,float16,0,0.378548789024353
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,8,128,1,float16,fp8,0,0.34872000217437743
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,32,128,1,float16,float16,0,0.2605072021484375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,8,128,1,fp8,fp8,0,0.36239840984344485
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,32,128,1,fp8,fp8,0,0.23686718940734863
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,1,128,1,float16,float16,0,0.18165440559387208
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,1,128,1,float16,fp8,0,0.18317919969558716
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,1,128,1,fp8,fp8,0,0.18320000171661377
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,2,128,1,float16,float16,0,0.1818112015724182
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,2,128,1,float16,fp8,0,0.18185919523239136
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,2,128,1,fp8,fp8,0,0.18269120454788207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,4,128,1,float16,float16,0,0.1865264058113098
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,1,128,1,float16,float16,0,0.683073616027832
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,4,128,1,fp8,fp8,0,0.1819599986076355
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,8,128,1,float16,float16,0,0.1963520050048828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,8,128,1,float16,fp8,0,0.1818320035934448
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,8,128,1,fp8,fp8,0,0.18223999738693236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,32,128,1,float16,float16,0,0.1384992003440857
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,32,128,1,float16,fp8,0,0.12680959701538086
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,32,128,1,fp8,fp8,0,0.12681119441986083
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,1,128,1,float16,float16,0,0.10084799528121949
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,1,128,1,float16,fp8,0,0.09860799908638
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,1,128,1,fp8,fp8,0,0.09904639720916748
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,2,128,1,float16,float16,0,0.10089759826660157
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,2,128,1,float16,fp8,0,0.09941759705543518
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,2,128,1,fp8,fp8,0,0.09915360212326049
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,4,128,1,float16,float16,0,0.10371840000152588
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,4,128,1,float16,fp8,0,0.09971839785575867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,4,128,1,fp8,fp8,0,0.09946399927139282
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,8,128,1,float16,float16,0,0.10860480070114135
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,8,128,1,float16,fp8,0,0.1003648042678833
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,8,128,1,fp8,fp8,0,0.09991040229797363
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,32,128,1,fp8,fp8,0,0.4509136199951172
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,32,128,1,float16,float16,0,0.07844480276107788
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,32,128,1,fp8,fp8,0,0.07190399765968322
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,32,128,1,float16,fp8,0,0.07219679951667786
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,1,128,1,float16,fp8,0,0.35787200927734375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,1,128,1,float16,float16,0,0.058715200424194335
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,1,128,1,fp8,fp8,0,0.057467198371887206
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,1,128,1,float16,fp8,0,0.057631999254226685
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,2,128,1,float16,float16,0,0.058963197469711306
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,2,128,1,float16,fp8,0,0.057571202516555786
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,2,128,1,fp8,fp8,0,0.05758879780769348
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,4,128,1,float16,float16,0,0.06017919778823853
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,4,128,1,float16,fp8,0,0.057633602619171144
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,4,128,1,fp8,fp8,0,0.057467198371887206
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,8,128,1,float16,float16,0,0.0627232015132904
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,8,128,1,float16,fp8,0,0.057627201080322266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,8,128,1,fp8,fp8,0,0.0577567994594574
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,32,128,1,float16,float16,0,0.04691999852657318
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,32,128,1,float16,fp8,0,0.04327679872512817
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,32,128,1,fp8,fp8,0,0.043224000930786134
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,1,128,1,float16,float16,0,0.038627201318740846
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,1,128,1,float16,fp8,0,0.03699840009212494
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,1,128,1,fp8,fp8,0,0.03711999952793121
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,2,128,1,float16,fp8,0,0.0370608001947403
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,4,128,1,float16,float16,0,0.038873600959777835
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,4,128,1,float16,fp8,0,0.03701440095901489
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,4,128,1,fp8,fp8,0,0.03698880076408386
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,8,128,1,float16,float16,0,0.03943839967250824
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,8,128,1,float16,fp8,0,0.037088000774383546
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,8,128,1,fp8,fp8,0,0.03707840144634247
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,32,128,1,float16,float16,0,0.026881599426269533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,32,128,1,float16,fp8,0,0.23629119396209716
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,32,128,1,fp8,fp8,0,0.0268528014421463
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,32,128,1,float16,fp8,0,0.02685759961605072
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,1,128,1,float16,float16,0,0.024566400051116943
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,1,128,1,float16,fp8,0,0.023177599906921385
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,1,128,1,fp8,fp8,0,0.022731199860572815
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,2,128,1,float16,fp8,0,0.023254400491714476
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,2,128,1,fp8,fp8,0,0.022745600342750548
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,4,128,1,float16,float16,0,0.024740800261497498
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,4,128,1,float16,fp8,0,0.02279040068387985
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,8,128,1,float16,float16,0,0.024779200553894043
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,8,128,1,float16,fp8,0,0.02282239943742752
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,4,128,1,float16,fp8,0,0.18226879835128784
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,8,128,1,fp8,fp8,0,0.023684799671173096
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,32,1,128,1,float16,float16,0,0.9767120361328125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,32,1,128,1,float16,fp8,0,1.003451156616211
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,32,128,1,float16,float16,0,0.5448768138885498
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,2,128,1,float16,float16,0,0.03862879872322082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,32,2,128,1,float16,float16,0,0.9621727943420411
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,2,128,1,fp8,fp8,0,0.03678399920463562
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,32,2,128,1,float16,fp8,0,1.0017727851867675
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,2,128,1,float16,float16,0,0.024587200582027437
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,4,128,1,fp8,fp8,0,0.023089599609375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,32,2,128,1,fp8,fp8,0,1.0005200386047364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,32,4,128,1,float16,float16,0,1.076308822631836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,32,4,128,1,fp8,fp8,0,1.001540756225586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,32,8,128,1,float16,float16,0,1.1013952255249024
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,32,8,128,1,float16,fp8,0,0.999015998840332
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,32,8,128,1,fp8,fp8,0,1.0001168251037598
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,32,32,128,1,float16,float16,0,0.8236016273498535
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,32,32,128,1,float16,fp8,0,0.7142015933990479
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,32,32,128,1,fp8,fp8,0,0.7128464221954346
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,32,1,128,1,float16,float16,0,0.4865695953369141
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,32,1,128,1,float16,fp8,0,0.5071280002593994
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,32,1,128,1,fp8,fp8,0,0.5064688205718995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,32,2,128,1,float16,fp8,0,0.5071263790130616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,32,2,128,1,fp8,fp8,0,0.5068064212799073
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,32,4,128,1,float16,float16,0,0.511359977722168
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,32,4,128,1,float16,fp8,0,0.5064464092254639
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,32,1,128,1,fp8,fp8,0,1.0091520309448243
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,32,4,128,1,fp8,fp8,0,0.5069647789001465
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,32,8,128,1,float16,float16,0,0.5492800235748291
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,32,8,128,1,fp8,fp8,0,0.5210944175720215
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,32,32,128,1,float16,float16,0,0.41031999588012696
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,32,1,128,1,float16,float16,0,0.25131199359893797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,32,32,128,1,fp8,fp8,0,0.3644927978515625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,32,1,128,1,float16,fp8,0,0.26823360919952394
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,32,1,128,1,fp8,fp8,0,0.26056480407714844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,32,2,128,1,float16,float16,0,0.251964807510376
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,32,2,128,1,float16,fp8,0,0.2609231948852539
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,32,2,128,1,fp8,fp8,0,0.26789920330047606
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,32,4,128,1,float16,float16,0,0.2615936040878296
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,32,4,128,1,float16,fp8,0,0.9975791931152344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,32,4,128,1,float16,fp8,0,0.2618943929672241
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,32,4,128,1,fp8,fp8,0,0.2611952066421509
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,32,8,128,1,float16,float16,0,0.28429279327392576
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,32,8,128,1,float16,fp8,0,0.26392159461975095
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,32,32,128,1,float16,fp8,0,0.18945120573043822
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,32,32,128,1,float16,float16,0,0.21231520175933838
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,32,32,128,1,fp8,fp8,0,0.19073280096054077
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,32,1,128,1,float16,float16,0,0.13556480407714844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,32,1,128,1,float16,fp8,0,0.13786879777908326
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,32,1,128,1,fp8,fp8,0,0.1395840048789978
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,32,2,128,1,float16,float16,0,0.1346768021583557
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,32,2,128,1,float16,fp8,0,0.1391968011856079
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,32,2,128,1,fp8,fp8,0,0.1372928023338318
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,32,4,128,1,float16,float16,0,0.14001599550247193
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,32,4,128,1,float16,fp8,0,0.1379024028778076
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,32,4,128,1,fp8,fp8,0,0.13932160139083863
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,32,8,128,1,float16,float16,0,0.14875680208206177
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,32,8,128,1,float16,fp8,0,0.13893439769744872
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,32,32,128,1,float16,float16,0,0.11353919506072999
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,32,8,128,1,fp8,fp8,0,0.13778879642486572
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,32,32,128,1,float16,fp8,0,0.10271519422531128
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,32,32,128,1,fp8,fp8,0,0.10302560329437256
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,32,1,128,1,float16,float16,0,0.076801598072052
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,32,1,128,1,float16,fp8,0,0.07469599843025207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,32,1,128,1,fp8,fp8,0,0.07560960054397584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,32,2,128,1,float16,float16,0,0.0768992006778717
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,32,2,128,1,float16,fp8,0,0.07534080147743225
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,32,4,128,1,float16,float16,0,0.078711998462677
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,32,4,128,1,float16,fp8,0,0.07582719922065735
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,32,4,128,1,fp8,fp8,0,0.07559199929237366
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,32,8,128,1,float16,float16,0,0.08276960253715515
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,32,8,128,1,float16,fp8,0,0.07592960000038147
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,32,8,128,1,fp8,fp8,0,0.07624639868736267
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,32,32,128,1,float16,float16,0,0.062431997060775755
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,32,32,128,1,float16,fp8,0,0.057548797130584715
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,32,1,128,1,float16,float16,0,0.04333280026912689
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,32,8,128,1,float16,fp8,0,0.5067808151245117
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,32,1,128,1,float16,fp8,0,0.043243199586868286
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,32,1,128,1,fp8,fp8,0,0.04311679899692535
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,32,32,128,1,float16,fp8,0,0.3636255979537964
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,32,2,128,1,float16,fp8,0,0.04326240122318268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,32,2,128,1,fp8,fp8,0,0.04329279959201813
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,32,4,128,1,float16,float16,0,0.04415839910507202
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,32,4,128,1,float16,fp8,0,0.04317759871482849
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,32,4,128,1,fp8,fp8,0,0.04319359958171844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,32,8,128,1,float16,float16,0,0.047393599152565004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,32,8,128,1,float16,fp8,0,0.04321599900722504
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,32,8,128,1,fp8,fp8,0,0.043243199586868286
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,32,32,128,1,float16,float16,0,0.0370608001947403
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,32,32,128,1,float16,fp8,0,0.03420960009098053
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,32,32,128,1,fp8,fp8,0,0.03299359977245331
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,32,1,128,1,float16,float16,0,0.028678399324417115
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,32,1,128,1,float16,fp8,0,0.026910400390625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,32,1,128,1,fp8,fp8,0,0.026881599426269533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,32,2,128,1,float16,float16,0,0.028886398673057555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,32,2,128,1,fp8,fp8,0,0.027182400226593018
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,32,4,128,1,float16,float16,0,0.029145601391792297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,32,4,128,1,float16,fp8,0,0.027249601483345032
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,32,4,128,1,fp8,fp8,0,0.026876801252365114
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,32,8,128,1,float16,float16,0,0.0293503999710083
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,32,8,128,1,float16,fp8,0,0.02687999904155731
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,32,8,128,1,fp8,fp8,0,0.026848000288009644
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,32,32,128,1,float16,float16,0,0.020694400370121
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,32,32,128,1,float16,fp8,0,0.020718400180339814
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,32,32,128,1,fp8,fp8,0,0.02078240066766739
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,32,1,128,1,float16,float16,0,0.018619200587272643
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,32,1,128,1,float16,fp8,0,0.018590399622917177
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,32,1,128,1,fp8,fp8,0,0.018671999871730804
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,32,2,128,1,float16,float16,0,0.4971583843231201
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,32,2,128,1,float16,float16,0,0.01865600049495697
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,32,2,128,1,float16,fp8,0,0.017323200404644013
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,32,2,128,1,fp8,fp8,0,0.018654400110244752
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,32,4,128,1,float16,float16,0,0.01868479996919632
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,32,4,128,1,float16,fp8,0,0.01839679926633835
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,32,4,128,1,fp8,fp8,0,0.018624000251293182
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,32,8,128,1,float16,float16,0,0.018667200207710268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,32,8,128,1,float16,fp8,0,0.018681600689888
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,32,8,128,1,fp8,fp8,0,0.018464000523090364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,32,32,128,1,float16,fp8,0,0.018593600392341612
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,32,32,128,1,float16,float16,0,0.018590399622917177
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,32,32,128,1,fp8,fp8,0,0.018607999384403228
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,32,1,128,1,float16,float16,0,0.0165583997964859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,32,1,128,1,fp8,fp8,0,0.0166143998503685
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,32,2,128,1,float16,float16,0,0.01661760061979294
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,32,2,128,1,float16,float16,0,0.043568000197410583
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,32,2,128,1,float16,fp8,0,0.01656000018119812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,32,2,128,1,fp8,fp8,0,0.016601599752902985
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,32,4,128,1,float16,fp8,0,0.016604800522327424
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,32,8,128,1,float16,float16,0,0.01661760061979294
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,32,4,128,1,fp8,fp8,0,0.01661120057106018
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,32,8,128,1,float16,fp8,0,0.016616000235080718
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,32,8,128,1,fp8,fp8,0,0.01664000004529953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,32,1,128,1,float16,float16,0,0.3892672061920166
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,32,1,128,1,float16,fp8,0,0.4190832138061523
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,32,2,128,1,float16,fp8,0,0.026923200488090514
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,32,8,128,1,fp8,fp8,0,0.26064798831939695
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,32,1,128,1,fp8,fp8,0,0.4191135883331299
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,32,2,128,1,float16,float16,0,0.39166080951690674
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,32,2,128,1,float16,fp8,0,0.41901121139526365
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,32,2,128,1,fp8,fp8,0,0.4176527976989746
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,32,4,128,1,float16,float16,0,0.4121295928955078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,32,4,128,1,float16,fp8,0,0.4187136173248291
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,32,4,128,1,fp8,fp8,0,0.41910557746887206
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,32,8,128,1,float16,float16,0,0.4524256229400635
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,32,2,128,1,fp8,fp8,0,0.07641760110855103
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,32,8,128,1,float16,fp8,0,0.4186592102050781
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,32,8,128,1,fp8,fp8,0,0.4187664031982422
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,32,32,128,1,float16,float16,0,0.3588176012039185
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,32,32,128,1,float16,fp8,0,0.3155375957489014
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,32,1,128,1,float16,float16,0,0.20233919620513915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,32,32,128,1,fp8,fp8,0,0.31726560592651365
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,32,1,128,1,fp8,fp8,0,0.21446559429168702
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,32,32,128,1,fp8,fp8,0,0.05749120116233826
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,32,2,128,1,float16,float16,0,0.20359840393066406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,32,2,128,1,fp8,fp8,0,0.21509919166564942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,32,4,128,1,float16,float16,0,0.21368000507354737
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,32,1,128,1,float16,fp8,0,0.01659359931945801
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,32,4,128,1,fp8,fp8,0,0.21476318836212158
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,32,8,128,1,float16,float16,0,0.2331167936325073
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,32,8,128,1,float16,fp8,0,0.21598880290985106
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,32,8,128,1,fp8,fp8,0,0.215500807762146
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,32,32,128,1,float16,float16,0,0.184716796875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,32,32,128,1,float16,fp8,0,0.1642624020576477
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,32,32,128,1,fp8,fp8,0,0.16425280570983886
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,32,1,128,1,float16,fp8,0,0.11282880306243896
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,32,1,128,1,fp8,fp8,0,0.11284799575805664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,32,2,128,1,float16,float16,0,0.10882879495620727
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,32,2,128,1,float16,fp8,0,0.11316800117492676
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,32,2,128,1,fp8,fp8,0,0.11272319555282592
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,32,4,128,1,float16,float16,0,0.11344319581985474
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,32,4,128,1,float16,fp8,0,0.11252000331878662
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,32,4,128,1,fp8,fp8,0,0.1133072018623352
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,32,8,128,1,float16,float16,0,0.12236160039901733
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,32,8,128,1,float16,fp8,0,0.11344159841537475
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,32,8,128,1,fp8,fp8,0,0.1133952021598816
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,32,32,128,1,float16,float16,0,0.09883840084075927
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,32,32,128,1,float16,fp8,0,0.08678399920463561
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,32,1,128,1,float16,float16,0,0.05988320112228394
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,32,1,128,1,float16,fp8,0,0.059811198711395265
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,32,1,128,1,fp8,fp8,0,0.05975199937820434
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,32,2,128,1,float16,float16,0,0.05988479852676391
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,32,2,128,1,float16,fp8,0,0.06079040169715881
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,32,2,128,1,fp8,fp8,0,0.0603983998298645
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,32,4,128,1,float16,float16,0,0.06307680010795594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,32,1,128,1,float16,fp8,0,0.2145103931427002
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,32,4,128,1,float16,fp8,0,0.060759997367858885
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,32,4,128,1,fp8,fp8,0,0.06039680242538452
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,32,8,128,1,float16,float16,0,0.06770880222320556
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,32,2,128,1,float16,fp8,0,0.2153088092803955
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,32,8,128,1,fp8,fp8,0,0.06171360015869141
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,32,32,128,1,float16,float16,0,0.05710399746894836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,32,4,128,1,float16,fp8,0,0.21409280300140382
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,32,32,128,1,fp8,fp8,0,0.05108960270881653
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,32,1,128,1,float16,float16,0,0.035436800122261046
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,32,4,128,1,float16,float16,0,0.016595199704170227
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,32,1,128,1,fp8,fp8,0,0.03630400002002716
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,32,2,128,1,float16,float16,0,0.03537279963493347
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,32,2,128,1,float16,fp8,0,0.036364799737930296
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,32,2,128,1,fp8,fp8,0,0.03707680106163025
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,32,4,128,1,float16,float16,0,0.037057599425315856
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,32,4,128,1,float16,fp8,0,0.036817601323127745
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,32,1,128,1,float16,float16,0,0.10835520029067994
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,32,4,128,1,fp8,fp8,0,0.036616000533103946
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,32,8,128,1,float16,float16,0,0.04043680131435394
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,32,8,128,1,float16,fp8,0,0.03650720119476318
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,32,8,128,1,fp8,fp8,0,0.03685120046138764
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,32,32,128,1,float16,float16,0,0.030888000130653383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,32,32,128,1,float16,fp8,0,0.027399998903274537
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,32,32,128,1,fp8,fp8,0,0.028921601176261903
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,32,1,128,1,float16,float16,0,0.022276799380779266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,32,1,128,1,float16,fp8,0,0.02069759964942932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,32,1,128,1,fp8,fp8,0,0.022337600588798523
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,32,2,128,1,float16,float16,0,0.02171359956264496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,32,2,128,1,float16,fp8,0,0.02085919976234436
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,32,4,128,1,float16,float16,0,0.02272160053253174
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,32,2,128,1,fp8,fp8,0,0.021143999695777894
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,32,4,128,1,float16,fp8,0,0.02136480063199997
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,32,4,128,1,fp8,fp8,0,0.022097599506378175
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,32,8,128,1,float16,fp8,0,0.0226623997092247
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,32,8,128,1,fp8,fp8,0,0.022643199563026427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,32,32,128,1,float16,float16,0,0.01659359931945801
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,32,32,128,1,float16,fp8,0,0.01866080015897751
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,32,32,128,1,fp8,fp8,0,0.018518400192260743
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,32,1,128,1,float16,float16,0,0.01451680064201355
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,32,1,128,1,float16,fp8,0,0.014531199634075165
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,32,2,128,1,float16,float16,0,0.014569599926471711
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,32,2,128,1,float16,fp8,0,0.014483200013637542
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,32,32,128,1,fp8,fp8,0,0.0867680013179779
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,32,2,128,1,fp8,fp8,0,0.014478400349617004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,32,4,128,1,float16,float16,0,0.014585599303245544
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,32,4,128,1,fp8,fp8,0,0.014547200500965118
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,32,8,128,1,float16,float16,0,0.01658879965543747
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,32,8,128,1,float16,fp8,0,0.014579200744628906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,32,8,128,1,fp8,fp8,0,0.014660799503326416
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,32,32,128,1,float16,fp8,0,0.014632000029087067
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,32,32,128,1,fp8,fp8,0,0.014561599493026734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,32,1,128,1,float16,float16,0,0.012588800489902496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,32,1,128,1,float16,fp8,0,0.012539200484752655
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,32,1,128,1,fp8,fp8,0,0.012585599720478059
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,32,2,128,1,float16,float16,0,0.014379200339317322
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,32,2,128,1,float16,fp8,0,0.01427839994430542
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,32,2,128,1,fp8,fp8,0,0.014177599549293518
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,32,8,128,1,float16,fp8,0,0.061559998989105226
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,32,4,128,1,float16,float16,0,0.014499199390411378
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,32,4,128,1,float16,fp8,0,0.013422399759292603
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,32,32,128,1,float16,fp8,0,0.05146719813346863
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,32,8,128,1,float16,float16,0,0.014560000598430633
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,32,8,128,1,float16,fp8,0,0.014571200311183929
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,32,1,128,1,float16,fp8,0,0.0356687992811203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,32,8,128,1,fp8,fp8,0,0.014444799721240997
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,32,32,128,1,float16,float16,0,0.014483200013637542
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,32,32,128,1,float16,fp8,0,0.014534400403499603
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,32,32,128,1,fp8,fp8,0,0.014523200690746307
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,32,1,128,1,float16,float16,0,0.012534399330615998
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,32,1,128,1,float16,fp8,0,0.012486399710178375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,32,1,128,1,fp8,fp8,0,0.0125231996178627
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,32,2,128,1,float16,float16,0,0.01250080019235611
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,32,2,128,1,float16,fp8,0,0.012547199428081513
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,32,2,128,1,fp8,fp8,0,0.012544000148773193
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,32,4,128,1,float16,fp8,0,0.012510399520397186
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,32,4,128,1,fp8,fp8,0,0.012503999471664428
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,32,8,128,1,float16,float16,0,0.012494400143623352
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,32,8,128,1,float16,fp8,0,0.012494400143623352
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,32,8,128,1,fp8,fp8,0,0.012652799487113953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,32,8,128,1,float16,float16,0,0.02282239943742752
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,32,1,128,1,float16,float16,0,0.2454767942428589
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,32,1,128,1,float16,fp8,0,0.26054399013519286
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,32,1,128,1,fp8,fp8,0,0.014510400593280792
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,32,1,128,1,fp8,fp8,0,0.26113600730895997
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,32,2,128,1,float16,float16,0,0.24664640426635742
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,32,2,128,1,float16,fp8,0,0.26089279651641845
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,32,4,128,1,float16,fp8,0,0.014611199498176575
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,32,2,128,1,fp8,fp8,0,0.2609055995941162
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,32,4,128,1,float16,float16,0,0.2548640012741089
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,32,4,128,1,float16,fp8,0,0.2608911991119385
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,32,32,128,1,float16,float16,0,0.01518079936504364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,32,4,128,1,fp8,fp8,0,0.26013119220733644
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,32,8,128,1,float16,float16,0,0.2750063896179199
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,32,8,128,1,float16,fp8,0,0.2604304075241089
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,32,8,128,1,fp8,fp8,0,0.26085920333862306
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,32,32,128,1,fp8,fp8,0,0.18691519498825074
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,32,1,128,1,float16,float16,0,0.1297808051109314
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,32,1,128,1,float16,fp8,0,0.13589440584182738
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,32,1,128,1,fp8,fp8,0,0.13584159612655639
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,32,2,128,1,float16,float16,0,0.1299456000328064
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,32,2,128,1,float16,fp8,0,0.13595839738845825
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,32,2,128,1,fp8,fp8,0,0.13577439785003662
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,32,4,128,1,float16,float16,0,0.1348191976547241
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,32,4,128,1,float16,fp8,0,0.13580479621887206
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,32,4,128,1,fp8,fp8,0,0.1357103943824768
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,32,8,128,1,float16,float16,0,0.14334880113601683
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,32,4,128,1,fp8,fp8,0,0.014315199851989747
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,32,8,128,1,float16,fp8,0,0.13573119640350342
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,32,8,128,1,fp8,fp8,0,0.13591519594192505
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,32,32,128,1,float16,float16,0,0.10487680435180664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,32,32,128,1,float16,fp8,0,0.09819679856300353
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,32,32,128,1,fp8,fp8,0,0.09847840070724487
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,32,1,128,1,float16,float16,0,0.07039520144462585
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,32,1,128,1,float16,fp8,0,0.07179840207099915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,32,4,128,1,float16,float16,0,0.012531200051307678
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,32,1,128,1,fp8,fp8,0,0.07189279794692993
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,32,2,128,1,float16,fp8,0,0.07189599871635437
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,32,2,128,1,fp8,fp8,0,0.0716543972492218
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,32,4,128,1,float16,float16,0,0.07312960028648377
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,32,4,128,1,fp8,fp8,0,0.072079998254776
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,32,4,128,1,float16,fp8,0,0.0724240005016327
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,32,8,128,1,float16,fp8,0,0.07231199741363525
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,32,8,128,1,fp8,fp8,0,0.07271999716758729
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,32,32,128,1,float16,float16,0,0.057038402557373045
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,32,32,128,1,float16,fp8,0,0.05353119969367981
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,32,32,128,1,fp8,fp8,0,0.05344480276107788
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,32,1,128,1,float16,fp8,0,0.03914720118045807
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,32,1,128,1,float16,float16,0,0.0390608012676239
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,32,1,128,1,fp8,fp8,0,0.03912160098552704
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,32,2,128,1,float16,float16,0,0.03916319906711578
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,32,2,128,1,float16,fp8,0,0.03916319906711578
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,32,2,128,1,fp8,fp8,0,0.03909760117530823
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,32,4,128,1,float16,fp8,0,0.039062398672103885
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,32,4,128,1,float16,float16,0,0.039447999000549315
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,32,4,128,1,fp8,fp8,0,0.0390608012676239
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,32,8,128,1,float16,float16,0,0.04326559901237488
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,32,32,128,1,float16,float16,0,0.03180800080299377
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,32,8,128,1,fp8,fp8,0,0.039155200123786926
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,32,32,128,1,fp8,fp8,0,0.030907198786735535
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,32,1,128,1,float16,float16,0,0.024792000651359558
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,32,1,128,1,float16,fp8,0,0.024784000217914583
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,32,1,128,1,fp8,fp8,0,0.024775999784469604
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,32,2,128,1,float16,float16,0,0.02470400035381317
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,32,2,128,1,float16,fp8,0,0.024804799258708952
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,32,2,128,1,fp8,fp8,0,0.024792000651359558
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,32,4,128,1,float16,float16,0,0.02478239983320236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,32,4,128,1,float16,fp8,0,0.025064000487327577
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,32,8,128,1,float16,float16,0,0.026630398631095887
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,32,32,128,1,float16,float16,0,0.2017103910446167
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,32,8,128,1,fp8,fp8,0,0.024792000651359558
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,32,32,128,1,float16,float16,0,0.01865759938955307
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,32,32,128,1,float16,fp8,0,0.018675200641155243
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,32,32,128,1,fp8,fp8,0,0.018667200207710268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,32,1,128,1,float16,float16,0,0.017606399953365326
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,32,1,128,1,float16,fp8,0,0.01656160056591034
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,32,1,128,1,fp8,fp8,0,0.016264000535011293
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,32,2,128,1,float16,float16,0,0.01515360027551651
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,32,2,128,1,float16,fp8,0,0.016264000535011293
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,32,2,128,1,fp8,fp8,0,0.016307200491428375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,32,4,128,1,float16,float16,0,0.01658399999141693
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,32,4,128,1,float16,fp8,0,0.016524800658226015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,32,2,128,1,float16,float16,0,0.07074080109596252
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,32,4,128,1,fp8,fp8,0,0.016607999801635742
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,32,8,128,1,float16,float16,0,0.01658560037612915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,32,8,128,1,float16,fp8,0,0.016604800522327424
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,32,8,128,1,float16,float16,0,0.07741439938545228
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,32,8,128,1,fp8,fp8,0,0.01660960018634796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,32,32,128,1,float16,float16,0,0.01255359947681427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,32,32,128,1,float16,fp8,0,0.012545600533485413
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,32,1,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,32,1,128,1,float16,fp8,0,0.01051200032234192
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,32,1,128,1,fp8,fp8,0,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,32,2,128,1,float16,fp8,0,0.010438399761915207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,32,2,128,1,fp8,fp8,0,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,32,4,128,1,float16,float16,0,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,32,8,128,1,float16,fp8,0,0.0390639990568161
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,32,4,128,1,float16,fp8,0,0.012398400157690049
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,32,32,128,1,float16,fp8,0,0.03094240128993988
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,32,4,128,1,fp8,fp8,0,0.012439999729394913
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,32,8,128,1,float16,float16,0,0.01250080019235611
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,32,8,128,1,float16,fp8,0,0.012408000230789185
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,32,8,128,1,fp8,fp8,0,0.010531199723482132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,32,32,128,1,float16,float16,0,0.012483199685811996
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,32,32,128,1,float16,fp8,0,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,32,32,128,1,fp8,fp8,0,0.012457600235939026
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,32,1,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,32,1,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,32,4,128,1,fp8,fp8,0,0.024758400022983552
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,32,1,128,1,fp8,fp8,0,0.01048479974269867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,32,2,128,1,float16,float16,0,0.010393600165843963
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,32,2,128,1,float16,fp8,0,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,32,4,128,1,float16,fp8,0,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,32,4,128,1,fp8,fp8,0,0.010435199737548828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,32,8,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,32,32,128,1,float16,fp8,0,0.1852527976036072
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,32,8,128,1,float16,fp8,0,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,32,8,128,1,fp8,fp8,0,0.010390400141477584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,32,32,128,1,float16,float16,0,0.010883200168609618
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,32,32,128,1,float16,fp8,0,0.010414399951696397
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,32,32,128,1,fp8,fp8,0,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,32,1,128,1,float16,float16,0,0.010428799688816071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,32,1,128,1,float16,fp8,0,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,32,1,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,32,2,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,32,2,128,1,float16,fp8,0,0.010367999970912933
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,32,2,128,1,fp8,fp8,0,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,32,4,128,1,float16,float16,0,0.010436800122261048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,32,4,128,1,float16,fp8,0,0.010475199669599533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,32,4,128,1,fp8,fp8,0,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,32,8,128,1,float16,float16,0,0.01048479974269867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,32,8,128,1,float16,fp8,0,0.010406400263309478
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,32,8,128,1,fp8,fp8,0,0.0105103999376297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,32,1,128,1,float16,float16,0,0.20711359977722169
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,32,1,128,1,float16,fp8,0,0.2107919931411743
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,32,32,128,1,fp8,fp8,0,0.012507200241088867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,32,1,128,1,fp8,fp8,0,0.21106240749359131
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,32,2,128,1,float16,float16,0,0.011273600161075592
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,32,2,128,1,float16,fp8,0,0.21084959506988527
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,32,2,128,1,fp8,fp8,0,0.21086719036102294
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,32,4,128,1,float16,float16,0,0.21226880550384522
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,32,2,128,1,float16,float16,0,0.20754721164703369
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,32,4,128,1,float16,fp8,0,0.21135358810424804
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,32,4,128,1,fp8,fp8,0,0.21001760959625243
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,32,8,128,1,float16,float16,0,0.22024478912353515
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,32,8,128,1,float16,fp8,0,0.21123039722442627
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,32,8,128,1,fp8,fp8,0,0.21125919818878175
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,32,32,128,1,float16,float16,0,0.14274879693984985
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,32,32,128,1,float16,fp8,0,0.13622720241546632
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,32,32,128,1,fp8,fp8,0,0.13646399974822998
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,32,1,128,1,float16,float16,0,0.11049920320510864
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,32,1,128,1,float16,fp8,0,0.11023199558258057
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,32,1,128,1,fp8,fp8,0,0.11047040224075318
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,32,2,128,1,float16,float16,0,0.11085439920425415
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,32,4,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,32,2,128,1,fp8,fp8,0,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,32,2,128,1,float16,fp8,0,0.10957119464874268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,32,2,128,1,fp8,fp8,0,0.11083040237426758
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,32,4,128,1,float16,float16,0,0.1122431993484497
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,32,4,128,1,float16,fp8,0,0.11019519567489625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,32,4,128,1,fp8,fp8,0,0.11014399528503419
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,32,32,128,1,float16,float16,0,0.07644320130348206
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,32,8,128,1,float16,float16,0,0.11673920154571533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,32,32,128,1,float16,fp8,0,0.07230880260467529
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,32,1,128,1,float16,float16,0,0.05924479961395264
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,32,1,128,1,float16,fp8,0,0.05833119750022888
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,32,1,128,1,fp8,fp8,0,0.05830079913139343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,32,2,128,1,float16,float16,0,0.0593936026096344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,32,2,128,1,fp8,fp8,0,0.05832639932632446
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,32,4,128,1,float16,float16,0,0.061607998609542844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,32,4,128,1,float16,fp8,0,0.05817599892616272
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,32,4,128,1,fp8,fp8,0,0.058126401901245114
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,32,8,128,1,float16,float16,0,0.06452159881591797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,32,8,128,1,fp8,fp8,0,0.058355200290679934
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,32,32,128,1,float16,fp8,0,0.039103999733924866
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,32,32,128,1,fp8,fp8,0,0.039110401272773744
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,32,1,128,1,float16,float16,0,0.033190399408340454
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,32,1,128,1,float16,fp8,0,0.03297759890556336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,32,1,128,1,fp8,fp8,0,0.03300159871578216
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,32,2,128,1,float16,float16,0,0.03417119979858398
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,32,2,128,1,float16,fp8,0,0.0329584002494812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,32,2,128,1,fp8,fp8,0,0.03298560082912445
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,32,4,128,1,float16,float16,0,0.03461920022964478
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,32,4,128,1,float16,fp8,0,0.03298079967498779
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,32,4,128,1,fp8,fp8,0,0.03291999995708465
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,32,8,128,1,float16,float16,0,0.03511520028114319
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,32,8,128,1,float16,fp8,0,0.03302879929542542
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,32,8,128,1,fp8,fp8,0,0.03293440043926239
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,32,32,128,1,float16,float16,0,0.02476000040769577
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,32,32,128,1,float16,fp8,0,0.024742400646209715
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,32,32,128,1,fp8,fp8,0,0.024806399643421174
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,32,1,128,1,float16,float16,0,0.021300800144672394
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,32,1,128,1,float16,fp8,0,0.020763200521469117
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,32,1,128,1,fp8,fp8,0,0.02065120041370392
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,32,2,128,1,float16,float16,0,0.020665599405765532
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,32,2,128,1,float16,fp8,0,0.02070080041885376
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,32,32,128,1,fp8,fp8,0,0.07263200283050537
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,32,8,128,1,fp8,fp8,0,0.1107856035232544
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,32,8,128,1,float16,fp8,0,0.02481600046157837
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,32,2,128,1,fp8,fp8,0,0.020919999480247496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,32,8,128,1,float16,fp8,0,0.11074240207672119
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,32,4,128,1,float16,float16,0,0.0227743998169899
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,32,4,128,1,float16,fp8,0,0.02139520049095154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,32,4,128,1,fp8,fp8,0,0.020718400180339814
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,32,8,128,1,float16,fp8,0,0.0211776003241539
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,32,8,128,1,fp8,fp8,0,0.02112800031900406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,32,32,128,1,float16,fp8,0,0.016633599996566772
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,32,32,128,1,fp8,fp8,0,0.01661120057106018
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,32,1,128,1,float16,float16,0,0.014563199877738953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,32,1,128,1,float16,fp8,0,0.014552000164985656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,32,1,128,1,fp8,fp8,0,0.014521600306034088
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,32,32,128,1,float16,float16,0,0.04333919882774353
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,32,2,128,1,float16,float16,0,0.014529600739479065
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,32,2,128,1,float16,fp8,0,0.01451359987258911
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,32,4,128,1,float16,float16,0,0.014584000408649444
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,32,2,128,1,fp8,fp8,0,0.01456640064716339
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,32,4,128,1,float16,fp8,0,0.014588800072669984
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,32,4,128,1,fp8,fp8,0,0.01459839940071106
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,32,8,128,1,float16,float16,0,0.01454399973154068
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,32,8,128,1,float16,fp8,0,0.014579200744628906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,32,8,128,1,fp8,fp8,0,0.014550399780273438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,32,32,128,1,float16,float16,0,0.012430399656295776
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,32,32,128,1,float16,fp8,0,0.01045759990811348
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,32,32,128,1,fp8,fp8,0,0.01082720011472702
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,32,1,128,1,float16,float16,0,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,32,1,128,1,fp8,fp8,0,0.010487999767065048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,32,2,128,1,float16,float16,0,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,32,1,128,1,float16,fp8,0,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,32,2,128,1,float16,fp8,0,0.01048479974269867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,32,4,128,1,float16,float16,0,0.010520000010728836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,32,4,128,1,float16,fp8,0,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,32,4,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,32,8,128,1,float16,float16,0,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,32,8,128,1,fp8,fp8,0,0.010492800176143647
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,32,32,128,1,float16,float16,0,0.010571199655532836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,32,32,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,32,32,128,1,fp8,fp8,0,0.010494399815797806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,32,1,128,1,float16,float16,0,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,32,1,128,1,float16,fp8,0,0.010407999902963639
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,32,1,128,1,fp8,fp8,0,0.010476800054311753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,32,2,128,1,float16,float16,0,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,32,2,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,32,2,128,1,fp8,fp8,0,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,32,4,128,1,float16,float16,0,0.010494399815797806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,32,4,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,32,4,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,32,8,128,1,float16,float16,0,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,32,8,128,1,float16,fp8,0,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,32,8,128,1,fp8,fp8,0,0.010487999767065048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,32,32,128,1,float16,float16,0,0.010499200224876404
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,32,32,128,1,float16,fp8,0,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,32,32,128,1,fp8,fp8,0,0.010380800068378448
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,32,1,128,1,float16,float16,0,0.010471999645233154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,32,2,128,1,float16,fp8,0,0.05879999995231629
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,32,1,128,1,float16,fp8,0,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,32,8,128,1,float16,float16,0,0.022737599909305573
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,32,1,128,1,fp8,fp8,0,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,32,2,128,1,float16,float16,0,0.010412800312042236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,32,32,128,1,float16,float16,0,0.01642560064792633
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,32,2,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,32,8,128,1,float16,fp8,0,0.05892639756202698
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,32,4,128,1,fp8,fp8,0,0.010444799810647965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,32,8,128,1,float16,float16,0,0.0104032002389431
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,32,8,128,1,fp8,fp8,0,0.010371199995279311
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,32,1,128,1,float16,float16,0,0.18430720567703246
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,32,1,128,1,float16,fp8,0,0.18339040279388427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,32,2,128,1,fp8,fp8,0,0.010475199669599533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,32,2,128,1,float16,float16,0,0.18358080387115477
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,32,2,128,1,float16,fp8,0,0.18280320167541503
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,32,2,128,1,fp8,fp8,0,0.18301440477371217
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,32,4,128,1,float16,float16,0,0.18524800539016723
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,32,4,128,1,float16,fp8,0,0.1832800030708313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,32,4,128,1,fp8,fp8,0,0.18369120359420776
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,32,8,128,1,float16,float16,0,0.18948960304260254
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,32,8,128,1,float16,fp8,0,0.18316479921340942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,32,8,128,1,fp8,fp8,0,0.18360480070114135
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,32,32,128,1,float16,float16,0,0.11414879560470581
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,32,32,128,1,float16,fp8,0,0.10946400165557861
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,32,32,128,1,fp8,fp8,0,0.11018719673156738
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,32,1,128,1,float16,float16,0,0.09727039933204651
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,32,1,128,1,float16,fp8,0,0.09626560211181641
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,32,1,128,1,fp8,fp8,0,0.09566079974174499
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,32,2,128,1,float16,float16,0,0.09728639721870422
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,32,2,128,1,float16,fp8,0,0.09584800004959107
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,32,2,128,1,fp8,fp8,0,0.09666399955749512
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,32,4,128,1,float16,float16,0,0.0986352026462555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,32,4,128,1,float16,fp8,0,0.09631839990615845
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,32,4,128,1,fp8,fp8,0,0.09578880071640014
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,32,2,128,1,float16,fp8,0,0.010471999645233154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,32,4,128,1,float16,float16,0,0.010398399829864503
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,32,8,128,1,float16,float16,0,0.10199520587921143
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,32,4,128,1,float16,fp8,0,0.010438399761915207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,32,8,128,1,float16,fp8,0,0.09570879936218261
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,32,32,128,1,float16,float16,0,0.062454402446746826
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,32,32,128,1,float16,fp8,0,0.058310401439666745
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,32,8,128,1,fp8,fp8,0,0.09651520252227783
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,32,8,128,1,float16,fp8,0,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,32,1,128,1,float16,float16,0,0.053592002391815184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,32,1,128,1,float16,fp8,0,0.05154079794883728
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,32,1,128,1,fp8,fp8,0,0.0518127977848053
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,32,2,128,1,float16,float16,0,0.0539359986782074
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,32,2,128,1,fp8,fp8,0,0.05188959836959839
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,32,4,128,1,fp8,fp8,0,0.05206720232963562
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,32,8,128,1,float16,float16,0,0.05555840134620667
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,32,8,128,1,float16,fp8,0,0.052051198482513425
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,32,1,128,1,fp8,fp8,0,0.18334560394287108
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,32,8,128,1,fp8,fp8,0,0.051841598749160764
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,32,32,128,1,float16,fp8,0,0.033030399680137636
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,32,32,128,1,float16,float16,0,0.03359679877758026
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,32,32,128,1,fp8,fp8,0,0.03300639986991882
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,32,8,128,1,float16,fp8,0,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,32,1,128,1,float16,fp8,0,0.02972640097141266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,32,1,128,1,fp8,fp8,0,0.02913439869880676
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,32,2,128,1,float16,float16,0,0.03099200129508972
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,32,2,128,1,float16,fp8,0,0.02910720109939575
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,32,2,128,1,fp8,fp8,0,0.02932640016078949
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,32,4,128,1,float16,float16,0,0.031033599376678468
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,32,4,128,1,float16,fp8,0,0.02916960120201111
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,32,4,128,1,fp8,fp8,0,0.029918399453163148
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,32,8,128,1,float16,float16,0,0.03224000036716461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,32,8,128,1,float16,fp8,0,0.030142399668693542
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,32,8,128,1,fp8,fp8,0,0.029955199360847472
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,32,32,128,1,float16,float16,0,0.02131039947271347
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,32,32,128,1,float16,fp8,0,0.020737600326538087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,32,32,128,1,fp8,fp8,0,0.020681600272655486
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,32,1,128,1,float16,float16,0,0.0207056000828743
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,32,1,128,1,float16,fp8,0,0.018849599361419677
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,32,1,128,1,fp8,fp8,0,0.019785599410533906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,32,2,128,1,float16,float16,0,0.020694400370121
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,32,2,128,1,fp8,fp8,0,0.01870879977941513
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,32,4,128,1,float16,float16,0,0.02073120027780533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,32,4,128,1,float16,fp8,0,0.01980320066213608
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,32,4,128,1,fp8,fp8,0,0.020168000459671022
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,32,8,128,1,float16,float16,0,0.020684799551963805
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,32,8,128,1,float16,fp8,0,0.0203232005238533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,32,8,128,1,fp8,fp8,0,0.020321600139141083
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,32,32,128,1,float16,float16,0,0.014539200067520141
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,32,32,128,1,float16,fp8,0,0.014563199877738953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,32,32,128,1,fp8,fp8,0,0.014580799639225006
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,32,1,128,1,float16,float16,0,0.013519999384880067
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,32,4,128,1,float16,float16,0,0.05443040132522583
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,32,1,128,1,float16,fp8,0,0.01249760016798973
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,32,1,128,1,fp8,fp8,0,0.012521600723266602
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,32,4,128,1,float16,fp8,0,0.05177119970321655
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,32,2,128,1,float16,fp8,0,0.012476799637079239
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,32,2,128,1,fp8,fp8,0,0.012611199915409089
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,32,4,128,1,float16,float16,0,0.014151999354362487
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,32,4,128,1,float16,fp8,0,0.012654399871826172
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,32,4,128,1,fp8,fp8,0,0.012488000094890594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,32,8,128,1,float16,float16,0,0.01449279934167862
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,32,8,128,1,float16,fp8,0,0.012598399817943574
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,32,8,128,1,fp8,fp8,0,0.012492799758911132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,32,32,128,1,float16,float16,0,0.011614400148391723
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,32,32,128,1,float16,fp8,0,0.010412800312042236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,32,32,128,1,fp8,fp8,0,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,32,1,128,1,float16,float16,0,0.030934399366378783
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,32,1,128,1,float16,float16,0,0.010419200360774993
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,32,1,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,32,1,128,1,fp8,fp8,0,0.010452800244092942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,32,2,128,1,float16,float16,0,0.010494399815797806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,32,2,128,1,float16,fp8,0,0.010444799810647965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,32,2,128,1,fp8,fp8,0,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,32,4,128,1,float16,float16,0,0.010471999645233154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,32,4,128,1,float16,fp8,0,0.010393600165843963
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,32,4,128,1,fp8,fp8,0,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,32,8,128,1,float16,float16,0,0.010406400263309478
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,32,8,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,32,8,128,1,float16,fp8,0,0.010360000282526016
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,32,32,128,1,float16,fp8,0,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,32,32,128,1,fp8,fp8,0,0.010409600287675857
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,32,1,128,1,float16,float16,0,0.010360000282526016
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,32,1,128,1,float16,fp8,0,0.009467200189828873
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,32,1,128,1,fp8,fp8,0,0.010435199737548828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,32,2,128,1,float16,fp8,0,0.01005600020289421
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,32,2,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,32,4,128,1,float16,float16,0,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,32,4,128,1,fp8,fp8,0,0.009734400361776353
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,32,2,128,1,float16,fp8,0,0.018750399351119995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,32,8,128,1,float16,float16,0,0.010436800122261048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,32,8,128,1,float16,fp8,0,0.010406400263309478
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,32,8,128,1,fp8,fp8,0,0.010497599840164185
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,32,32,128,1,float16,float16,0,0.010491199791431427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,32,32,128,1,float16,fp8,0,0.010446400195360184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,32,32,128,1,fp8,fp8,0,0.010435199737548828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,32,1,128,1,float16,float16,0,0.010380800068378448
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,32,1,128,1,float16,fp8,0,0.00933919996023178
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,32,1,128,1,fp8,fp8,0,0.0084927998483181
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,32,2,128,1,float16,float16,0,0.010150399804115296
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,32,2,128,1,float16,fp8,0,0.008479999750852585
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,32,2,128,1,fp8,fp8,0,0.009340800344944
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,32,4,128,1,float16,fp8,0,0.00950239971280098
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,32,4,128,1,float16,float16,0,0.008774399757385254
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,32,4,128,1,fp8,fp8,0,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,32,8,128,1,float16,float16,0,0.00886240005493164
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,32,8,128,1,float16,fp8,0,0.01040000021457672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,32,8,128,1,fp8,fp8,0,0.00870240032672882
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,32,1,128,1,float16,float16,0,0.17705119848251344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,32,32,128,1,fp8,fp8,0,0.05772640109062195
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,32,1,128,1,float16,fp8,0,0.17092000246047973
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,32,2,128,1,float16,fp8,0,0.051737600564956666
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,32,1,128,1,fp8,fp8,0,0.16954400539398193
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,32,2,128,1,float16,float16,0,0.013734400272369385
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,32,2,128,1,float16,float16,0,0.17773760557174684
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,32,2,128,1,fp8,fp8,0,0.17026079893112184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,32,2,128,1,float16,fp8,0,0.17047519683837892
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,32,4,128,1,float16,float16,0,0.17895519733428955
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,32,4,128,1,float16,fp8,0,0.17113120555877687
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,32,4,128,1,fp8,fp8,0,0.17031359672546387
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,32,8,128,1,float16,float16,0,0.1813007950782776
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,32,8,128,1,fp8,fp8,0,0.1713871955871582
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,32,32,128,1,float16,float16,0,0.10402879714965821
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,32,8,128,1,float16,fp8,0,0.17099839448928833
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,32,32,128,1,float16,fp8,0,0.09550560116767884
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,32,32,128,1,fp8,fp8,0,0.09640960097312927
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,32,1,128,1,float16,float16,0,0.09447680115699768
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,32,1,128,1,float16,fp8,0,0.08993759751319885
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,32,1,128,1,fp8,fp8,0,0.08902239799499512
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,32,2,128,1,float16,float16,0,0.09407680034637451
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,32,2,128,1,float16,fp8,0,0.0897871971130371
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,32,4,128,1,float16,float16,0,0.09483360052108765
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,32,2,128,1,fp8,fp8,0,0.0896287977695465
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,32,4,128,1,float16,fp8,0,0.09020479917526245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,32,4,128,1,fp8,fp8,0,0.08964800238609313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,32,8,128,1,float16,float16,0,0.09555839896202087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,32,32,128,1,float16,float16,0,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,32,32,128,1,float16,float16,0,0.05522879958152771
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,32,8,128,1,fp8,fp8,0,0.08965920209884644
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,32,32,128,1,float16,fp8,0,0.052382397651672366
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,32,1,128,1,float16,float16,0,0.052908802032470705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,32,1,128,1,float16,fp8,0,0.04948799908161163
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,32,2,128,1,float16,float16,0,0.010425599664449692
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,32,1,128,1,fp8,fp8,0,0.049265599250793456
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,32,2,128,1,float16,float16,0,0.05294719934463501
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,32,2,128,1,float16,fp8,0,0.049291199445724486
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,32,2,128,1,fp8,fp8,0,0.04971359968185425
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,32,4,128,1,float16,float16,0,0.05278239846229553
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,32,4,128,1,float16,fp8,0,0.049527999758720395
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,32,4,128,1,float16,fp8,0,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,32,8,128,1,float16,float16,0,0.05378400087356568
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,32,8,128,1,float16,fp8,0,0.049449598789215087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,32,32,128,1,float16,float16,0,0.032815998792648314
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,32,32,128,1,float16,fp8,0,0.03087199926376343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,32,32,128,1,fp8,fp8,0,0.0307343989610672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,32,1,128,1,float16,float16,0,0.030943998694419862
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,32,1,128,1,float16,fp8,0,0.028891199827194215
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,32,1,128,1,fp8,fp8,0,0.02886880040168762
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,32,2,128,1,float16,float16,0,0.031027200818061828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,32,2,128,1,fp8,fp8,0,0.028867200016975403
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,32,4,128,1,float16,float16,0,0.0308896005153656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,32,4,128,1,float16,fp8,0,0.02887359857559204
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,32,4,128,1,fp8,fp8,0,0.02887200117111206
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,32,8,128,1,float16,float16,0,0.030899199843406677
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,32,8,128,1,float16,fp8,0,0.02887359857559204
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,32,8,128,1,fp8,fp8,0,0.028806400299072266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,32,32,128,1,float16,float16,0,0.0207056000828743
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,32,32,128,1,fp8,fp8,0,0.019596800208091736
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,32,1,128,1,float16,float16,0,0.020528000593185425
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,32,1,128,1,float16,fp8,0,0.018636800348758698
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,32,2,128,1,float16,float16,0,0.0194255992770195
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,32,2,128,1,float16,fp8,0,0.01866080015897751
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,32,2,128,1,fp8,fp8,0,0.018643200397491455
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,32,4,128,1,float16,float16,0,0.02069759964942932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,32,4,128,1,float16,fp8,0,0.018619200587272643
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,32,4,128,1,fp8,fp8,0,0.018694399297237395
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,32,8,128,1,float16,float16,0,0.020708799362182617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,32,8,128,1,float16,fp8,0,0.018638400733470915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,32,8,128,1,fp8,fp8,0,0.018654400110244752
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,32,32,128,1,float16,float16,0,0.01483200043439865
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,32,32,128,1,float16,fp8,0,0.013235199451446533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,32,32,128,1,fp8,fp8,0,0.012916800379753113
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,32,1,128,1,float16,float16,0,0.012715199589729309
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,32,1,128,1,float16,fp8,0,0.012558400630950928
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,32,1,128,1,fp8,fp8,0,0.012518399953842163
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,32,2,128,1,float16,float16,0,0.012705600261688233
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,32,2,128,1,float16,fp8,0,0.012491200119256973
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,32,32,128,1,fp8,fp8,0,0.05284799933433533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,32,2,128,1,fp8,fp8,0,0.012929600477218629
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,32,4,128,1,float16,float16,0,0.014443199336528777
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,32,4,128,1,float16,fp8,0,0.012824000418186187
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,32,4,128,1,fp8,fp8,0,0.012891200184822083
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,32,4,128,1,fp8,fp8,0,0.049262401461601255
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,32,8,128,1,float16,fp8,0,0.013123199343681335
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,32,8,128,1,fp8,fp8,0,0.012534399330615998
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,32,32,128,1,float16,float16,0,0.011191999912261963
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,32,32,128,1,float16,fp8,0,0.010401599854230881
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,32,32,128,1,fp8,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,32,1,128,1,float16,float16,0,0.01048320010304451
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,32,1,128,1,float16,fp8,0,0.01043199971318245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,32,1,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,32,2,128,1,float16,fp8,0,0.02892000079154968
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,32,2,128,1,float16,float16,0,0.010438399761915207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,32,2,128,1,float16,fp8,0,0.01048320010304451
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,32,2,128,1,fp8,fp8,0,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,32,4,128,1,float16,fp8,0,0.010526400059461594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,32,4,128,1,fp8,fp8,0,0.010409600287675857
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,32,8,128,1,float16,float16,0,0.010475199669599533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,32,8,128,1,float16,fp8,0,0.010424000024795533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,32,8,128,1,fp8,fp8,0,0.010369600355625152
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,32,32,128,1,float16,float16,0,0.010369600355625152
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,32,32,128,1,float16,fp8,0,0.01884160041809082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,32,32,128,1,float16,fp8,0,0.010342399775981902
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,32,32,128,1,fp8,fp8,0,0.010356800258159637
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,32,1,128,1,fp8,fp8,0,0.0186256006360054
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,32,1,128,1,float16,float16,0,0.010358399897813796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,32,1,128,1,float16,fp8,0,0.010396800190210342
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,32,1,128,1,fp8,fp8,0,0.010416000336408614
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,32,2,128,1,float16,float16,0,0.010315199941396713
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,32,2,128,1,float16,fp8,0,0.010395199805498124
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,32,2,128,1,fp8,fp8,0,0.009302400052547455
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,32,4,128,1,float16,fp8,0,0.010356800258159637
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,32,4,128,1,float16,float16,0,0.010356800258159637
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,32,4,128,1,fp8,fp8,0,0.009353599697351455
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,32,8,128,1,float16,float16,0,0.010377600044012069
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,32,8,128,1,float16,fp8,0,0.010388799756765366
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,32,8,128,1,fp8,fp8,0,0.009455999732017517
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,32,32,128,1,float16,fp8,0,0.00933919996023178
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,32,32,128,1,fp8,fp8,0,0.008382400125265121
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,32,1,128,1,float16,fp8,0,0.008488000184297562
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,32,1,128,1,fp8,fp8,0,0.008550400286912918
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,32,2,128,1,float16,float16,0,0.008708799630403519
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,32,2,128,1,float16,fp8,0,0.008401600271463394
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,32,2,128,1,fp8,fp8,0,0.008988799899816513
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,32,4,128,1,float16,float16,0,0.010419200360774993
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,32,4,128,1,float16,fp8,0,0.010436800122261048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,32,4,128,1,fp8,fp8,0,0.00963200032711029
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,32,8,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,32,8,128,1,float16,fp8,0,0.01003040000796318
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,32,8,128,1,fp8,fp8,0,0.009598399698734283
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,32,8,128,1,float16,fp8,0,0.08966079950332642
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,32,1,128,1,float16,float16,0,0.17467520236968995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,32,1,128,1,float16,fp8,0,0.1635215997695923
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,32,8,128,1,float16,float16,0,0.014563199877738953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,32,1,128,1,fp8,fp8,0,0.16391199827194214
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,32,2,128,1,float16,float16,0,0.17490240335464477
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,32,4,128,1,float16,float16,0,0.17521599531173707
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,32,2,128,1,float16,fp8,0,0.16453440189361573
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,32,2,128,1,fp8,fp8,0,0.16371359825134277
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,32,4,128,1,float16,float16,0,0.01045759990811348
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,32,4,128,1,fp8,fp8,0,0.16416159868240357
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,32,8,128,1,float16,float16,0,0.1747488021850586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,32,32,128,1,float16,float16,0,0.09472159743309021
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,32,8,128,1,fp8,fp8,0,0.16412160396575928
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,32,8,128,1,float16,fp8,0,0.16347039937973024
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,32,32,128,1,float16,fp8,0,0.08772479891777038
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,32,32,128,1,fp8,fp8,0,0.08785279989242553
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,32,1,128,1,float16,float16,0,0.09336959719657897
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,32,1,128,1,float16,fp8,0,0.08715360164642334
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,32,1,128,1,fp8,fp8,0,0.08735039830207825
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,32,2,128,1,float16,float16,0,0.09348160028457642
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,32,2,128,1,float16,fp8,0,0.08722400069236755
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,32,2,128,1,fp8,fp8,0,0.08746560215950012
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,32,4,128,1,float16,float16,0,0.09343360066413879
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,32,32,128,1,float16,float16,0,0.010428799688816071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,32,4,128,1,float16,fp8,0,0.08710399866104127
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,32,4,128,1,fp8,fp8,0,0.08754079937934875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,32,1,128,1,float16,float16,0,0.010372799634933472
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,32,8,128,1,float16,float16,0,0.09356480240821838
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,32,32,128,1,float16,float16,0,0.05368800163269043
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,32,8,128,1,fp8,fp8,0,0.08762879967689514
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,32,32,128,1,float16,fp8,0,0.04854080080986023
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,32,32,128,1,fp8,fp8,0,0.04906400144100189
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,32,1,128,1,float16,fp8,0,0.04864960014820099
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,32,2,128,1,float16,float16,0,0.052407997846603396
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,32,2,128,1,float16,fp8,0,0.04868960082530975
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,32,2,128,1,fp8,fp8,0,0.0483487993478775
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,32,4,128,1,float16,float16,0,0.05262719988822937
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,32,4,128,1,float16,fp8,0,0.04823360145092011
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,32,4,128,1,fp8,fp8,0,0.047993600368499756
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,32,8,128,1,float16,float16,0,0.052249598503112796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,32,8,128,1,float16,fp8,0,0.04780319929122925
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,32,8,128,1,fp8,fp8,0,0.0479200005531311
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,32,32,128,1,float16,float16,0,0.030888000130653383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,32,32,128,1,float16,fp8,0,0.02804960012435913
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,32,32,128,1,fp8,fp8,0,0.028068798780441283
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,32,1,128,1,float16,float16,0,0.030868801474571227
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,32,1,128,1,float16,fp8,0,0.02864319980144501
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,32,1,128,1,fp8,fp8,0,0.028460800647735596
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,32,2,128,1,float16,fp8,0,0.028569599986076354
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,32,2,128,1,float16,float16,0,0.030830401182174682
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,32,2,128,1,fp8,fp8,0,0.028569599986076354
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,32,4,128,1,float16,fp8,0,0.028825598955154418
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,32,8,128,1,fp8,fp8,0,0.04941760003566742
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,32,4,128,1,fp8,fp8,0,0.028756800293922424
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,32,8,128,1,float16,float16,0,0.03012320101261139
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,32,8,128,1,fp8,fp8,0,0.028604799509048463
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,32,32,128,1,float16,float16,0,0.02056960016489029
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,32,32,128,1,float16,fp8,0,0.018624000251293182
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,32,32,128,1,fp8,fp8,0,0.01865759938955307
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,32,1,128,1,float16,float16,0,0.018680000305175783
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,32,1,128,1,float16,fp8,0,0.018668800592422485
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,32,4,128,1,float16,fp8,0,0.1642624020576477
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,32,1,128,1,fp8,fp8,0,0.018671999871730804
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,32,2,128,1,float16,float16,0,0.01873439997434616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,32,2,128,1,float16,fp8,0,0.01865279972553253
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,32,4,128,1,float16,float16,0,0.018671999871730804
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,32,4,128,1,float16,fp8,0,0.01865759938955307
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,32,4,128,1,fp8,fp8,0,0.018651199340820313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,32,8,128,1,float16,float16,0,0.019091199338436126
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,32,8,128,1,float16,fp8,0,0.01863040030002594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,32,8,128,1,fp8,fp8,0,0.018662400543689728
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,32,32,128,1,float16,float16,0,0.014558400213718414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,32,32,128,1,float16,fp8,0,0.012625600397586822
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,32,32,128,1,fp8,fp8,0,0.012508800625801087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,32,1,128,1,float16,float16,0,0.012620800733566284
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,32,1,128,1,float16,fp8,0,0.0125231996178627
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,32,8,128,1,float16,fp8,0,0.08680639863014221
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,32,1,128,1,fp8,fp8,0,0.012539200484752655
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,32,2,128,1,float16,float16,0,0.014433600008487701
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,32,2,128,1,float16,fp8,0,0.012529599666595458
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,32,2,128,1,fp8,fp8,0,0.012484800070524216
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,32,4,128,1,float16,float16,0,0.014511999487876893
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,32,4,128,1,float16,fp8,0,0.01250240057706833
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,32,4,128,1,fp8,fp8,0,0.012510399520397186
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,32,8,128,1,float16,float16,0,0.013203200697898865
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,32,8,128,1,float16,fp8,0,0.01250240057706833
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,32,8,128,1,fp8,fp8,0,0.012454400211572647
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,32,32,128,1,float16,fp8,0,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,32,32,128,1,fp8,fp8,0,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,32,1,128,1,float16,float16,0,0.010385599732398988
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,32,1,128,1,float16,fp8,0,0.010388799756765366
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,32,1,128,1,fp8,fp8,0,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,32,2,128,1,float16,float16,0,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,32,2,128,1,float16,fp8,0,0.010356800258159637
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,32,2,128,1,fp8,fp8,0,0.010344000160694122
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,32,4,128,1,float16,float16,0,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,32,4,128,1,float16,float16,0,0.03089439868927002
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,32,4,128,1,float16,fp8,0,0.010390400141477584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,32,4,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,32,8,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,32,8,128,1,float16,fp8,0,0.010416000336408614
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,32,32,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,32,8,128,1,float16,fp8,0,0.02791999876499176
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,32,32,128,1,float16,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,32,32,128,1,fp8,fp8,0,0.010447999835014344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,32,1,128,1,float16,float16,0,0.01037919968366623
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,32,1,128,1,float16,fp8,0,0.009673599898815156
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,32,1,128,1,fp8,fp8,0,0.008457600325345992
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,32,2,128,1,float16,float16,0,0.010476800054311753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,32,2,128,1,float16,fp8,0,0.008500800281763077
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,32,2,128,1,fp8,fp8,0,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,32,4,128,1,float16,float16,0,0.00971359983086586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,32,2,128,1,fp8,fp8,0,0.018587200343608855
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,32,4,128,1,float16,fp8,0,0.01048479974269867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,32,4,128,1,fp8,fp8,0,0.008401600271463394
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,32,8,128,1,float16,float16,0,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,32,8,128,1,float16,fp8,0,0.010385599732398988
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,32,8,128,1,fp8,fp8,0,0.008574400097131729
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,32,32,128,1,float16,float16,0,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,32,32,128,1,float16,fp8,0,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,32,32,128,1,fp8,fp8,0,0.00840959995985031
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,32,1,128,1,float16,float16,0,0.008483199775218964
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,32,1,128,1,float16,fp8,0,0.010371199995279311
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,32,1,128,1,fp8,fp8,0,0.008408000320196151
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,32,2,128,1,float16,float16,0,0.008448000252246856
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,32,2,128,1,float16,fp8,0,0.010401599854230881
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,32,2,128,1,fp8,fp8,0,0.00841120034456253
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,32,4,128,1,float16,float16,0,0.008396799862384795
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,32,4,128,1,float16,fp8,0,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,32,4,128,1,fp8,fp8,0,0.00849440023303032
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,32,8,128,1,float16,float16,0,0.010070399940013885
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,32,8,128,1,float16,fp8,0,0.008777599781751633
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,32,8,128,1,fp8,fp8,0,0.008761599659919739
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,32,1,128,1,float16,float16,0,0.05295519828796387
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,32,32,128,1,float16,float16,0,0.011603199690580369
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,32,8,128,1,fp8,fp8,0,0.010361599922180175
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,32,1,128,1,fp8,fp8,0,0.04814079999923706
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,1,128,1,float16,fp8,0,8.3883056640625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,1,128,1,fp8,fp8,0,8.427582550048829
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,2,128,1,float16,fp8,0,8.472846221923827
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,2,128,1,fp8,fp8,0,8.431368255615235
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,1,128,1,float16,float16,0,10.6859375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,2,128,1,float16,float16,0,10.880628967285157
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,4,128,1,float16,float16,0,10.606385803222656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,4,128,1,float16,fp8,0,8.465789031982421
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,4,128,1,fp8,fp8,0,8.526947021484375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,1,128,1,float16,float16,0,5.34368782043457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,8,128,1,float16,fp8,0,8.65411376953125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,24,128,1,float16,fp8,0,4.443510437011719
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,8,128,1,fp8,fp8,0,8.62234878540039
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,8,128,1,float16,float16,0,11.239215850830078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,24,128,1,fp8,fp8,0,4.4150737762451175
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,1,128,1,float16,fp8,0,4.386640167236328
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,1,128,1,fp8,fp8,0,4.504415893554688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,2,128,1,float16,fp8,0,4.345329666137696
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,2,128,1,fp8,fp8,0,4.406086349487305
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,2,128,1,float16,float16,0,5.545072174072265
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,4,128,1,float16,fp8,0,4.302212905883789
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,4,128,1,float16,float16,0,5.589734268188477
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,4,128,1,fp8,fp8,0,4.3613136291503904
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,24,128,1,float16,fp8,0,2.267608070373535
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,8,128,1,float16,fp8,0,4.345435333251953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,8,128,1,fp8,fp8,0,4.398396682739258
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,24,128,1,fp8,fp8,0,2.440020751953125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,8,128,1,float16,float16,0,5.587852859497071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,1,128,1,float16,float16,0,2.702862358093262
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,1,128,1,float16,fp8,0,2.170039939880371
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,1,128,1,fp8,fp8,0,2.230793571472168
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,2,128,1,float16,float16,0,2.6862768173217773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,2,128,1,float16,fp8,0,2.188732719421387
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,2,128,1,fp8,fp8,0,2.5281984329223635
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,4,128,1,float16,float16,0,2.7699615478515627
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,4,128,1,float16,fp8,0,2.259012794494629
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,4,128,1,fp8,fp8,0,2.43363037109375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,8,128,1,float16,float16,0,2.6446096420288088
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,8,128,1,float16,fp8,0,2.194700813293457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,8,128,1,fp8,fp8,0,2.4409391403198244
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,24,128,1,float16,fp8,0,1.207817554473877
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,24,128,1,fp8,fp8,0,1.4821311950683593
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,1,128,1,float16,float16,0,1.3029855728149413
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,1,128,1,float16,fp8,0,1.1740079879760743
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,1,128,1,fp8,fp8,0,1.1563039779663087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,2,128,1,float16,fp8,0,1.2141167640686035
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,2,128,1,float16,float16,0,1.297216033935547
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,4,128,1,float16,fp8,0,1.1532943725585938
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,4,128,1,float16,float16,0,1.2983807563781737
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,4,128,1,fp8,fp8,0,1.1628432273864746
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,8,128,1,float16,float16,0,1.3824432373046875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,8,128,1,float16,fp8,0,1.1531824111938476
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,8,128,1,fp8,fp8,0,1.3057583808898925
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,2,128,1,fp8,fp8,0,1.2114959716796876
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,1,128,1,float16,fp8,0,5.00819206237793
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,1,128,1,fp8,fp8,0,4.934228897094727
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,1,128,1,float16,float16,0,6.318219375610352
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,2,128,1,float16,float16,0,6.322547149658203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,2,128,1,float16,fp8,0,4.888702392578125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,2,128,1,fp8,fp8,0,4.957932662963867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,4,128,1,float16,fp8,0,5.1056640625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,4,128,1,float16,float16,0,6.340241622924805
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,4,128,1,fp8,fp8,0,5.019588851928711
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,8,128,1,float16,float16,0,6.246635055541992
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,24,128,1,float16,fp8,0,2.6017248153686525
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,8,128,1,float16,fp8,0,4.98645133972168
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,8,128,1,fp8,fp8,0,5.038119888305664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,24,128,1,fp8,fp8,0,2.7835599899291994
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,1,128,1,float16,fp8,0,2.5343391418457033
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,1,128,1,float16,float16,0,3.0477615356445313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,1,128,1,fp8,fp8,0,2.547603225708008
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,2,128,1,float16,float16,0,3.1353647232055666
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,2,128,1,float16,fp8,0,2.5141040802001955
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,2,128,1,fp8,fp8,0,2.709511947631836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,4,128,1,float16,float16,0,3.004435157775879
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,4,128,1,float16,fp8,0,2.7636896133422852
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,4,128,1,fp8,fp8,0,2.5069440841674804
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,8,128,1,float16,float16,0,3.0813135147094726
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,8,128,1,float16,fp8,0,2.7964799880981444
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,8,128,1,fp8,fp8,0,2.514004707336426
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,24,128,1,float16,fp8,0,1.6050464630126953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,24,128,1,fp8,fp8,0,1.371504020690918
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,1,128,1,float16,float16,0,1.4573280334472656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,1,128,1,float16,fp8,0,1.3522303581237793
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,1,128,1,fp8,fp8,0,1.2887999534606933
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,2,128,1,float16,float16,0,1.5461888313293457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,2,128,1,float16,fp8,0,1.2938096046447753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,2,128,1,fp8,fp8,0,1.2914768218994142
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,4,128,1,float16,float16,0,1.5933504104614258
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,4,128,1,float16,fp8,0,1.2915599822998047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,4,128,1,fp8,fp8,0,1.2874719619750976
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,8,128,1,float16,fp8,0,1.2919407844543458
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,8,128,1,float16,float16,0,1.5681455612182618
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,8,128,1,fp8,fp8,0,1.2886159896850586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,24,128,1,float16,fp8,0,0.8647263526916504
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,24,128,1,fp8,fp8,0,0.7215904235839844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,1,128,1,float16,float16,0,0.8080384254455566
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,1,128,1,float16,fp8,0,0.6914912223815918
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,1,128,1,fp8,fp8,0,0.7201119899749756
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,2,128,1,float16,float16,0,0.7670527935028076
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,2,128,1,float16,fp8,0,0.7094863891601563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,2,128,1,fp8,fp8,0,0.6994688034057617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,4,128,1,float16,float16,0,0.7748367786407471
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,4,128,1,float16,fp8,0,0.7006832122802734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,4,128,1,fp8,fp8,0,0.7273808002471924
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,8,128,1,float16,float16,0,0.7840511798858643
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,8,128,1,float16,fp8,0,0.7252048015594482
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,8,128,1,fp8,fp8,0,0.7060192108154297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,1,128,1,float16,fp8,0,3.5373775482177736
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,1,128,1,float16,float16,0,4.37031364440918
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,1,128,1,fp8,fp8,0,3.503985595703125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,2,128,1,float16,float16,0,4.33060302734375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,2,128,1,float16,fp8,0,3.540964889526367
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,2,128,1,fp8,fp8,0,3.8169582366943358
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,4,128,1,float16,float16,0,4.45184326171875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,4,128,1,float16,fp8,0,3.5418895721435546
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,4,128,1,fp8,fp8,0,3.7278064727783202
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,8,128,1,float16,float16,0,4.4834129333496096
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,8,128,1,float16,fp8,0,3.5977249145507812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,8,128,1,fp8,fp8,0,3.740963363647461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,24,128,1,float16,fp8,0,1.8821151733398438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,24,128,1,fp8,fp8,0,1.8793487548828125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,1,128,1,float16,float16,0,2.1038991928100588
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,1,128,1,float16,fp8,0,2.0639888763427736
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,1,128,1,fp8,fp8,0,1.8106191635131836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,2,128,1,float16,float16,0,2.100387191772461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,2,128,1,float16,fp8,0,2.0592432022094727
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,2,128,1,fp8,fp8,0,1.8042160034179688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,4,128,1,float16,float16,0,2.138684844970703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,4,128,1,float16,fp8,0,2.022625541687012
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,4,128,1,fp8,fp8,0,1.7749488830566407
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,8,128,1,float16,float16,0,2.066308784484863
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,8,128,1,float16,fp8,0,1.9295711517333984
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,8,128,1,fp8,fp8,0,1.8126848220825196
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,24,128,1,float16,float16,0,1.5448575973510743
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,24,128,1,float16,float16,0,0.8057231903076172
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,24,128,1,float16,fp8,0,1.059291172027588
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,24,128,1,float16,float16,0,1.3761568069458008
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,24,128,1,float16,float16,0,1.117244815826416
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,24,128,1,float16,float16,0,2.6529407501220703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,1,128,1,float16,float16,0,1.0434144020080567
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,1,128,1,float16,fp8,0,0.9487504005432129
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,1,128,1,fp8,fp8,0,0.9296175956726074
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,24,128,1,float16,float16,0,5.564740753173828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,2,128,1,float16,float16,0,1.160108757019043
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,2,128,1,float16,fp8,0,1.019260787963867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,4,128,1,float16,float16,0,1.0697471618652343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,4,128,1,float16,fp8,0,1.1355648040771484
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,8,128,1,float16,float16,0,1.0698991775512696
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,24,128,1,fp8,fp8,0,0.9784208297729492
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,8,128,1,float16,fp8,0,1.0142576217651367
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,8,128,1,fp8,fp8,0,0.9789152145385742
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,24,128,1,float16,float16,0,0.5959167957305909
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,24,128,1,float16,fp8,0,0.5317359924316406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,24,128,1,fp8,fp8,0,0.5505055904388427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,1,128,1,float16,float16,0,0.5579567909240722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,1,128,1,fp8,fp8,0,0.5065055847167969
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,2,128,1,float16,float16,0,0.5615344047546387
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,2,128,1,float16,fp8,0,0.5149775981903076
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,2,128,1,fp8,fp8,0,0.5079535961151123
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,4,128,1,float16,float16,0,0.5629104137420654
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,4,128,1,float16,fp8,0,0.5134255886077881
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,2,128,1,fp8,fp8,0,0.9477791786193848
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,4,128,1,fp8,fp8,0,0.5082176208496094
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,8,128,1,float16,float16,0,0.5661759853363038
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,8,128,1,fp8,fp8,0,0.5072368144989013
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,4,128,1,fp8,fp8,0,1.0707440376281738
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,24,128,1,float16,float16,0,2.2060304641723634
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,24,128,1,float16,float16,0,3.094401550292969
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,1,128,1,float16,fp8,0,0.5491583824157715
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,8,128,1,float16,fp8,0,0.5100880146026612
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,1,128,1,float16,fp8,0,4.574825668334961
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,1,128,1,fp8,fp8,0,4.594692611694336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,1,128,1,float16,float16,0,5.558860778808594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,2,128,1,float16,fp8,0,4.709228897094727
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,2,128,1,float16,float16,0,5.625609588623047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,2,128,1,fp8,fp8,0,4.566500854492188
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,4,128,1,float16,float16,0,5.63641128540039
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,4,128,1,float16,fp8,0,4.5537151336669925
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,24,128,1,float16,fp8,0,2.4637424468994142
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,24,128,1,float16,float16,0,3.0641151428222657
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,4,128,1,fp8,fp8,0,4.6063697814941404
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,8,128,1,fp8,fp8,0,4.60563850402832
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,8,128,1,float16,fp8,0,4.699604797363281
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,24,128,1,fp8,fp8,0,2.648855972290039
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,8,128,1,float16,float16,0,5.8592384338378904
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,1,128,1,float16,fp8,0,2.3164239883422852
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,1,128,1,float16,float16,0,2.7539695739746093
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,1,128,1,fp8,fp8,0,2.5013359069824217
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,2,128,1,float16,float16,0,2.7411968231201174
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,2,128,1,float16,fp8,0,2.335758399963379
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,2,128,1,fp8,fp8,0,2.320631980895996
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,4,128,1,float16,float16,0,2.795680046081543
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,4,128,1,float16,fp8,0,2.551033592224121
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,4,128,1,fp8,fp8,0,2.3095264434814453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,24,128,1,float16,fp8,0,1.2699215888977051
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,24,128,1,float16,float16,0,1.4518367767333984
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,8,128,1,float16,float16,0,2.8924224853515623
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,1,128,1,float16,float16,0,1.3740032196044922
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,1,128,1,float16,fp8,0,1.1905743598937988
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,24,128,1,fp8,fp8,0,1.47805757522583
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,8,128,1,fp8,fp8,0,2.6037696838378905
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,1,128,1,fp8,fp8,0,1.1916144371032715
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,2,128,1,float16,fp8,0,1.3649616241455078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,2,128,1,float16,float16,0,1.489739227294922
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,2,128,1,fp8,fp8,0,1.350876808166504
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,4,128,1,float16,fp8,0,1.231264019012451
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,4,128,1,fp8,fp8,0,1.227500820159912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,8,128,1,float16,float16,0,1.348031997680664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,24,128,1,float16,fp8,0,0.6744624137878418
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,8,128,1,float16,fp8,0,1.228559970855713
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,24,128,1,fp8,fp8,0,0.6705808162689209
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,8,128,1,fp8,fp8,0,1.3977184295654297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,1,128,1,float16,float16,0,0.7955760002136231
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,1,128,1,float16,fp8,0,0.6361775875091553
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,1,128,1,fp8,fp8,0,0.6383776187896728
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,2,128,1,float16,float16,0,0.693171215057373
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,8,128,1,float16,fp8,0,2.3168384552001955
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,2,128,1,float16,fp8,0,0.6918416023254395
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,4,128,1,float16,fp8,0,0.6331727981567383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,4,128,1,fp8,fp8,0,0.6387919902801513
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,2,128,1,fp8,fp8,0,0.804252815246582
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,8,128,1,float16,float16,0,0.7055200099945068
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,8,128,1,float16,fp8,0,0.6540448188781738
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,24,128,1,float16,float16,0,0.40762882232666015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,24,128,1,float16,fp8,0,0.41873440742492674
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,8,128,1,fp8,fp8,0,0.6320720195770264
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,1,128,1,float16,float16,0,0.37607359886169434
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,24,128,1,fp8,fp8,0,0.3696768045425415
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,1,128,1,float16,fp8,0,0.35550079345703123
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,1,128,1,fp8,fp8,0,0.3650576114654541
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,2,128,1,float16,float16,0,0.37456479072570803
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,2,128,1,fp8,fp8,0,0.34921441078186033
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,4,128,1,float16,float16,0,1.3310272216796875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,4,128,1,float16,float16,0,0.38651199340820314
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,4,128,1,float16,fp8,0,0.3509088039398193
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,4,128,1,fp8,fp8,0,0.3492799997329712
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,8,128,1,float16,float16,0,0.38184161186218263
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,8,128,1,float16,fp8,0,0.3567503929138184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,8,128,1,fp8,fp8,0,0.3547823905944824
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,24,128,1,float16,float16,0,0.7728079795837403
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,4,128,1,float16,float16,0,0.6980303764343262
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,1,128,1,float16,fp8,0,2.7064672470092774
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,1,128,1,float16,float16,0,3.0983407974243162
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,1,128,1,fp8,fp8,0,2.711350440979004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,2,128,1,float16,fp8,0,0.3575727939605713
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,2,128,1,float16,fp8,0,2.703691291809082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,2,128,1,float16,float16,0,3.1519168853759765
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,2,128,1,fp8,fp8,0,2.6985504150390627
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,24,128,1,float16,float16,0,1.9161344528198243
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,4,128,1,fp8,fp8,0,2.7111568450927734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,8,128,1,float16,fp8,0,2.7057504653930664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,4,128,1,float16,float16,0,3.2221168518066405
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,4,128,1,float16,fp8,0,3.007459259033203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,24,128,1,float16,fp8,0,1.4839983940124513
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,8,128,1,float16,float16,0,3.185339164733887
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,24,128,1,fp8,fp8,0,1.6476991653442383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,1,128,1,float16,fp8,0,1.385927963256836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,1,128,1,float16,float16,0,1.529304027557373
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,2,128,1,float16,fp8,0,1.3775279998779297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,2,128,1,float16,float16,0,1.5300127983093261
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,1,128,1,fp8,fp8,0,1.5778800010681153
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,2,128,1,fp8,fp8,0,1.5756095886230468
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,4,128,1,float16,fp8,0,1.3814160346984863
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,4,128,1,float16,float16,0,1.549846363067627
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,24,128,1,float16,float16,0,0.8821536064147949
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,4,128,1,fp8,fp8,0,1.711859130859375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,8,128,1,fp8,fp8,0,1.410747241973877
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,8,128,1,float16,float16,0,1.579030418395996
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,8,128,1,float16,fp8,0,1.6285152435302734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,24,128,1,float16,fp8,0,0.7732975959777832
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,8,128,1,fp8,fp8,0,2.748958396911621
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,1,128,1,float16,float16,0,0.8111151695251465
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,1,128,1,float16,fp8,0,0.7909311771392822
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,1,128,1,fp8,fp8,0,0.7232880115509033
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,2,128,1,float16,float16,0,0.8342384338378906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,2,128,1,float16,fp8,0,0.7506239891052247
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,2,128,1,fp8,fp8,0,0.7191792011260987
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,4,128,1,float16,float16,0,0.8010687828063965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,4,128,1,float16,fp8,0,0.7905200004577637
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,4,128,1,fp8,fp8,0,0.7297135829925537
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,8,128,1,float16,float16,0,0.8033103942871094
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,24,128,1,float16,float16,0,0.4578559875488281
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,8,128,1,fp8,fp8,0,0.7519423961639404
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,8,128,1,float16,fp8,0,0.8182864189147949
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,24,128,1,float16,fp8,0,0.4161680221557617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,24,128,1,fp8,fp8,0,0.43422560691833495
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,1,128,1,float16,float16,0,0.41045441627502444
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,1,128,1,float16,fp8,0,0.38858718872070314
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,2,128,1,float16,fp8,0,0.39279999732971194
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,2,128,1,float16,float16,0,0.4463647842407227
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,4,128,1,float16,float16,0,0.4224527835845947
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,4,128,1,float16,fp8,0,0.3922015905380249
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,4,128,1,fp8,fp8,0,0.3884943962097168
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,8,128,1,float16,float16,0,0.42719039916992185
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,8,128,1,float16,fp8,0,0.4031199932098389
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,24,128,1,float16,float16,0,0.2549439907073975
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,8,128,1,fp8,fp8,0,0.3988912105560303
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,24,128,1,float16,fp8,0,0.25532801151275636
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,24,128,1,fp8,fp8,0,0.23877758979797364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,1,128,1,fp8,fp8,0,0.2278208017349243
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,2,128,1,float16,float16,0,0.24130239486694335
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,2,128,1,float16,fp8,0,0.22183520793914796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,2,128,1,fp8,fp8,0,0.22673120498657226
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,4,128,1,float16,float16,0,0.24120159149169923
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,4,128,1,float16,fp8,0,0.22104160785675048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,4,128,1,fp8,fp8,0,0.22534561157226562
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,8,128,1,float16,float16,0,0.24630239009857177
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,24,128,1,fp8,fp8,0,0.8953519821166992
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,8,128,1,fp8,fp8,0,0.2225775957107544
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,1,128,1,fp8,fp8,0,0.3896928071975708
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,2,128,1,fp8,fp8,0,0.3877871990203857
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,1,128,1,float16,float16,0,0.236411190032959
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,1,128,1,fp8,fp8,0,2.6241792678833007
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,1,128,1,float16,float16,0,2.9641504287719727
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,8,128,1,float16,fp8,0,0.221943998336792
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,2,128,1,float16,fp8,0,2.621295928955078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,2,128,1,float16,float16,0,3.0026416778564453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,2,128,1,fp8,fp8,0,2.616164779663086
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,4,128,1,float16,float16,0,2.981889533996582
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,4,128,1,float16,fp8,0,2.9770063400268554
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,1,128,1,float16,fp8,0,0.222870397567749
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,24,128,1,float16,fp8,0,1.490875244140625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,8,128,1,float16,fp8,0,2.8823471069335938
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,8,128,1,float16,float16,0,3.216846466064453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,24,128,1,float16,float16,0,1.9024351119995118
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,24,128,1,fp8,fp8,0,1.477291202545166
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,8,128,1,fp8,fp8,0,2.617697525024414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,1,128,1,float16,fp8,0,1.3416000366210938
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,2,128,1,float16,fp8,0,1.3337231636047364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,2,128,1,float16,float16,0,1.557759952545166
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,2,128,1,fp8,fp8,0,1.335108757019043
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,1,128,1,float16,fp8,0,2.6217840194702147
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,4,128,1,float16,float16,0,1.4610015869140625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,4,128,1,fp8,fp8,0,1.3292223930358886
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,4,128,1,float16,fp8,0,1.501251220703125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,8,128,1,float16,float16,0,1.5083855628967284
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,24,128,1,float16,float16,0,0.8599120140075683
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,8,128,1,float16,fp8,0,1.4641712188720704
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,8,128,1,fp8,fp8,0,1.3295071601867676
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,24,128,1,float16,fp8,0,0.8987855911254883
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,1,128,1,float16,float16,0,1.4365856170654296
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,24,128,1,fp8,fp8,0,0.7825119972229004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,1,128,1,float16,float16,0,0.7321680068969727
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,1,128,1,fp8,fp8,0,1.5910832405090332
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,1,128,1,float16,fp8,0,0.6853040218353271
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,1,128,1,fp8,fp8,0,0.7742959976196289
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,2,128,1,float16,float16,0,0.7279071807861328
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,2,128,1,float16,fp8,0,0.6993135929107666
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,2,128,1,fp8,fp8,0,0.7053679943084716
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,4,128,1,float16,fp8,0,0.6858223915100098
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,4,128,1,float16,float16,0,0.7527679920196533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,4,128,1,fp8,fp8,0,0.7081151962280273
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,24,128,1,float16,float16,0,0.4407487869262695
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,24,128,1,float16,fp8,0,0.4017280101776123
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,8,128,1,float16,float16,0,0.7606336116790772
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,8,128,1,fp8,fp8,0,0.6909471988677979
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,24,128,1,fp8,fp8,0,0.40405921936035155
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,1,128,1,float16,fp8,0,0.3646464109420776
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,1,128,1,float16,float16,0,0.38369600772857665
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,1,128,1,fp8,fp8,0,0.3855135917663574
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,2,128,1,float16,float16,0,0.3794464111328125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,2,128,1,float16,fp8,0,0.36632800102233887
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,2,128,1,fp8,fp8,0,0.3649503946304321
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,4,128,1,float16,float16,0,0.3903183937072754
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,4,128,1,float16,fp8,0,0.38285439014434813
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,4,128,1,fp8,fp8,0,0.3679759979248047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,8,128,1,float16,float16,0,0.3981663942337036
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,8,128,1,float16,fp8,0,0.3648432016372681
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,24,128,1,float16,fp8,0,0.22573440074920653
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,8,128,1,fp8,fp8,0,0.37007200717926025
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,1,128,1,float16,float16,0,0.2092144012451172
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,2,128,1,float16,float16,0,0.2128864049911499
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,1,128,1,fp8,fp8,0,0.20162079334259034
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,2,128,1,float16,fp8,0,0.20250558853149414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,2,128,1,fp8,fp8,0,0.2024847984313965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,4,128,1,float16,float16,0,0.21226398944854735
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,4,128,1,float16,fp8,0,0.20254719257354736
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,4,128,1,fp8,fp8,0,0.2022768020629883
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,8,128,1,float16,float16,0,0.21785759925842285
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,8,128,1,float16,fp8,0,0.20413761138916015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,8,128,1,fp8,fp8,0,0.20358879566192628
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,24,128,1,float16,float16,0,0.13985600471496581
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,24,128,1,float16,fp8,0,0.12978240251541137
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,24,128,1,fp8,fp8,0,0.12920639514923096
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,4,128,1,fp8,fp8,0,2.6185407638549805
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,1,128,1,float16,fp8,0,0.12162560224533081
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,1,128,1,fp8,fp8,0,0.12020479440689087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,2,128,1,float16,fp8,0,0.12121599912643433
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,2,128,1,fp8,fp8,0,0.12039680480957031
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,2,128,1,float16,float16,0,0.12665920257568358
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,4,128,1,float16,float16,0,0.12690240144729614
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,4,128,1,float16,fp8,0,0.12055200338363647
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,4,128,1,fp8,fp8,0,0.12025120258331298
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,8,128,1,float16,float16,0,0.12977759838104247
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,8,128,1,float16,fp8,0,0.6953248023986817
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,24,128,1,float16,float16,0,0.24632799625396729
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,24,128,1,fp8,fp8,0,0.22345919609069825
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,1,128,1,float16,fp8,0,1.609217643737793
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,1,128,1,float16,float16,0,1.7246944427490234
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,1,128,1,float16,float16,0,0.12652000188827514
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,1,128,1,fp8,fp8,0,1.6115968704223633
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,2,128,1,float16,fp8,0,1.6088863372802735
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,2,128,1,float16,float16,0,1.708705520629883
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,2,128,1,fp8,fp8,0,1.7320863723754882
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,8,128,1,float16,fp8,0,0.12113440036773682
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,8,128,1,fp8,fp8,0,0.12085599899291992
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,4,128,1,float16,float16,0,1.7202768325805664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,4,128,1,float16,fp8,0,1.6116640090942382
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,24,128,1,float16,float16,0,1.0443120002746582
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,4,128,1,fp8,fp8,0,1.6093711853027344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,8,128,1,float16,fp8,0,1.6189535140991211
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,8,128,1,fp8,fp8,0,1.6074640274047851
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,8,128,1,float16,float16,0,1.8938735961914062
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,24,128,1,float16,fp8,0,0.9709664344787597
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,24,128,1,fp8,fp8,0,0.9293503761291504
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,1,128,1,float16,fp8,0,0.8213968276977539
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,1,128,1,fp8,fp8,0,0.8224032402038575
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,2,128,1,float16,float16,0,0.8643664360046387
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,2,128,1,float16,fp8,0,0.9714447975158691
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,2,128,1,fp8,fp8,0,0.8500864028930664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,4,128,1,float16,float16,0,0.873635196685791
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,4,128,1,float16,fp8,0,0.8216480255126953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,4,128,1,fp8,fp8,0,0.8223679542541504
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,8,128,1,float16,float16,0,0.9149279594421387
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,24,128,1,float16,float16,0,0.5307439804077149
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,8,128,1,float16,fp8,0,0.8591728210449219
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,24,128,1,float16,fp8,0,0.48113441467285156
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,8,128,1,fp8,fp8,0,0.829435157775879
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,24,128,1,fp8,fp8,0,0.4815199851989746
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,1,128,1,float16,float16,0,0.49235520362854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,1,128,1,float16,fp8,0,0.43946399688720705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,1,128,1,fp8,fp8,0,0.4341263771057129
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,2,128,1,float16,float16,0,0.44312639236450196
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,2,128,1,float16,fp8,0,0.43052000999450685
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,2,128,1,fp8,fp8,0,0.49233279228210447
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,4,128,1,float16,float16,0,0.449780797958374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,4,128,1,float16,fp8,0,0.43105278015136717
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,4,128,1,fp8,fp8,0,0.4288832187652588
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,8,128,1,float16,float16,0,0.4625120162963867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,1,128,1,float16,fp8,0,0.20238399505615234
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,24,128,1,float16,float16,0,0.28032479286193845
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,24,128,1,float16,fp8,0,0.259004807472229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,24,128,1,fp8,fp8,0,0.2603343963623047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,1,128,1,float16,float16,0,0.2348720073699951
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,1,128,1,float16,fp8,0,0.23209118843078613
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,1,128,1,fp8,fp8,0,0.23096959590911864
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,1,128,1,float16,float16,0,0.862337589263916
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,2,128,1,float16,float16,0,0.23542399406433107
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,2,128,1,float16,fp8,0,0.231660795211792
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,2,128,1,fp8,fp8,0,0.23248639106750488
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,4,128,1,float16,float16,0,0.23958079814910888
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,4,128,1,float16,fp8,0,0.23134880065917968
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,4,128,1,fp8,fp8,0,0.23250079154968262
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,8,128,1,float16,float16,0,0.24645600318908692
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,8,128,1,float16,fp8,0,0.23293759822845458
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,8,128,1,fp8,fp8,0,0.23238720893859863
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,24,128,1,float16,fp8,0,0.14704159498214722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,24,128,1,fp8,fp8,0,0.1475200057029724
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,1,128,1,float16,fp8,0,0.13176319599151612
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,1,128,1,fp8,fp8,0,0.13190399408340453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,2,128,1,float16,float16,0,0.13492640256881713
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,2,128,1,float16,fp8,0,0.13179359436035157
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,2,128,1,fp8,fp8,0,0.1318992018699646
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,4,128,1,float16,float16,0,0.13782559633255004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,4,128,1,float16,fp8,0,0.1319823980331421
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,4,128,1,fp8,fp8,0,0.1323583960533142
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,8,128,1,float16,float16,0,0.14222400188446044
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,8,128,1,float16,fp8,0,0.1318112015724182
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,24,128,1,float16,float16,0,0.0968720018863678
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,8,128,1,fp8,fp8,0,0.1321023941040039
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,24,128,1,float16,fp8,0,0.0899999976158142
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,24,128,1,fp8,fp8,0,0.08997439742088317
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,1,128,1,float16,float16,0,0.08687679767608643
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,1,128,1,float16,fp8,0,0.08399519920349122
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,1,128,1,fp8,fp8,0,0.0840287983417511
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,2,128,1,float16,float16,0,0.0871775984764099
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,2,128,1,float16,fp8,0,0.08325440287590027
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,2,128,1,fp8,fp8,0,0.0832431972026825
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,4,128,1,float16,float16,0,0.08816800117492676
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,4,128,1,float16,fp8,0,0.08371999859809875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,4,128,1,fp8,fp8,0,0.0831167995929718
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,8,128,1,float16,fp8,0,0.08357759714126586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,8,128,1,fp8,fp8,0,0.08273119926452636
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,8,128,1,fp8,fp8,0,0.43679518699645997
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,1,128,1,float16,fp8,0,1.645510482788086
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,24,128,1,float16,float16,0,0.15545279979705812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,1,128,1,float16,float16,0,1.6814304351806642
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,1,128,1,float16,float16,0,0.13566559553146362
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,1,128,1,fp8,fp8,0,1.6477216720581054
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,2,128,1,float16,float16,0,1.7091360092163086
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,2,128,1,float16,fp8,0,1.6446304321289062
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,2,128,1,fp8,fp8,0,1.6432416915893555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,8,128,1,float16,float16,0,0.08942559957504273
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,4,128,1,float16,float16,0,1.8496400833129882
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,4,128,1,float16,fp8,0,1.6450624465942383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,4,128,1,fp8,fp8,0,1.6466960906982422
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,8,128,1,float16,fp8,0,0.45833601951599123
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,24,128,1,float16,float16,0,1.0852527618408203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,8,128,1,float16,float16,0,1.9197023391723633
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,24,128,1,fp8,fp8,0,0.975648021697998
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,24,128,1,float16,fp8,0,1.101897621154785
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,1,128,1,float16,float16,0,0.8375328063964844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,8,128,1,float16,fp8,0,1.6458223342895508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,8,128,1,fp8,fp8,0,1.643824005126953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,1,128,1,fp8,fp8,0,0.8755632400512695
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,2,128,1,float16,fp8,0,0.8351344108581543
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,2,128,1,float16,float16,0,0.922708797454834
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,2,128,1,fp8,fp8,0,0.8698335647583008
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,4,128,1,float16,fp8,0,0.8368736267089844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,4,128,1,float16,float16,0,0.9248703956604004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,4,128,1,fp8,fp8,0,0.8348943710327148
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,24,128,1,float16,float16,0,0.5570576190948486
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,8,128,1,float16,float16,0,0.9080287933349609
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,24,128,1,float16,fp8,0,0.5026944160461426
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,24,128,1,fp8,fp8,0,0.5545728206634521
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,8,128,1,fp8,fp8,0,0.9497216224670411
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,1,128,1,float16,fp8,0,0.43329920768737795
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,1,128,1,fp8,fp8,0,0.48063039779663086
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,2,128,1,float16,fp8,0,0.45073280334472654
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,2,128,1,fp8,fp8,0,0.46379518508911133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,4,128,1,float16,float16,0,0.44364638328552247
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,4,128,1,float16,fp8,0,0.4804512023925781
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,8,128,1,float16,float16,0,0.47517762184143064
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,8,128,1,float16,fp8,0,0.43315839767456055
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,8,128,1,fp8,fp8,0,0.4533775806427002
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,24,128,1,float16,float16,0,0.2900304079055786
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,24,128,1,float16,fp8,0,0.26521759033203124
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,1,128,1,float16,float16,0,0.2314512014389038
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,1,128,1,float16,fp8,0,0.22941598892211915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,1,128,1,fp8,fp8,0,0.23206400871276855
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,2,128,1,float16,float16,0,0.2301151990890503
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,2,128,1,float16,fp8,0,0.23021600246429444
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,2,128,1,fp8,fp8,0,0.22986080646514892
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,8,128,1,float16,fp8,0,0.8360671997070312
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,4,128,1,float16,float16,0,0.23392000198364257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,4,128,1,float16,fp8,0,0.22876479625701904
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,4,128,1,fp8,fp8,0,0.22961280345916749
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,2,128,1,float16,float16,0,0.43021278381347655
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,8,128,1,float16,float16,0,0.24281280040740966
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,8,128,1,float16,fp8,0,0.23003358840942384
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,8,128,1,fp8,fp8,0,0.22928481101989745
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,24,128,1,float16,float16,0,0.15589920282363892
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,24,128,1,float16,fp8,0,0.14610400199890136
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,1,128,1,float16,float16,0,0.12909120321273804
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,4,128,1,fp8,fp8,0,0.4427184104919434
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,1,128,1,float16,fp8,0,0.1273952007293701
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,1,128,1,fp8,fp8,0,0.1275760054588318
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,2,128,1,float16,float16,0,0.12966079711914064
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,2,128,1,float16,fp8,0,0.1273311972618103
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,2,128,1,fp8,fp8,0,0.1275439977645874
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,4,128,1,float16,float16,0,0.13234080076217652
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,4,128,1,float16,fp8,0,0.12747360467910768
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,4,128,1,fp8,fp8,0,0.12753440141677858
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,8,128,1,float16,float16,0,0.1374079942703247
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,8,128,1,float16,fp8,0,0.12866560220718384
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,8,128,1,fp8,fp8,0,0.12797759771347045
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,24,128,1,float16,float16,0,0.09221280217170716
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,24,128,1,float16,fp8,0,0.08552160263061523
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,1,128,1,float16,fp8,0,0.8337408065795898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,1,128,1,float16,float16,0,0.07847840189933777
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,1,128,1,float16,fp8,0,0.07630079984664917
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,1,128,1,fp8,fp8,0,0.07570719718933105
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,2,128,1,float16,float16,0,0.07832319736480713
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,2,128,1,float16,fp8,0,0.07687360048294067
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,2,128,1,fp8,fp8,0,0.07569119930267335
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,4,128,1,float16,float16,0,0.07938719987869262
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,4,128,1,fp8,fp8,0,0.07599840164184571
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,4,128,1,float16,fp8,0,0.07657279968261718
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,8,128,1,float16,fp8,0,0.07695519924163818
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,8,128,1,float16,float16,0,0.08123360276222229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,24,128,1,float16,float16,0,0.05645279884338379
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,24,128,1,float16,fp8,0,0.055687999725341795
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,24,128,1,fp8,fp8,0,0.05562880039215088
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,1,128,1,float16,float16,0,0.05276479721069336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,1,128,1,float16,fp8,0,0.05166879892349243
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,2,128,1,float16,float16,0,0.05263360142707825
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,2,128,1,float16,fp8,0,0.05152159929275513
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,2,128,1,fp8,fp8,0,0.051020801067352295
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,4,128,1,float16,float16,0,0.0536624014377594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,4,128,1,float16,fp8,0,0.0514240026473999
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,4,128,1,fp8,fp8,0,0.05143679976463318
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,1,128,1,float16,float16,0,0.42859840393066406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,8,128,1,float16,float16,0,0.05416479706764221
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,8,128,1,fp8,fp8,0,0.0514303982257843
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,24,128,1,fp8,fp8,0,0.1461583971977234
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,1,128,1,float16,float16,0,1.037881565093994
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,1,128,1,float16,fp8,0,1.0540528297424316
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,24,128,1,fp8,fp8,0,0.27062559127807617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,1,128,1,fp8,fp8,0,1.054923152923584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,2,128,1,float16,float16,0,1.0467776298522948
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,2,128,1,float16,fp8,0,1.05338716506958
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,8,128,1,fp8,fp8,0,0.07600319981575013
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,1,128,1,fp8,fp8,0,0.05142880082130432
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,2,128,1,fp8,fp8,0,1.0560064315795898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,4,128,1,float16,float16,0,1.0714655876159669
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,4,128,1,float16,fp8,0,1.173523235321045
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,4,128,1,fp8,fp8,0,1.0537424087524414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,8,128,1,float16,float16,0,1.142193603515625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,24,128,1,float16,float16,0,0.7086415767669678
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,8,128,1,float16,fp8,0,1.0544639587402345
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,24,128,1,float16,fp8,0,0.6608352184295654
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,24,128,1,fp8,fp8,0,0.6431424140930175
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,8,128,1,fp8,fp8,0,1.0565376281738281
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,1,128,1,float16,float16,0,0.5298927783966064
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,1,128,1,float16,fp8,0,0.5708367824554443
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,1,128,1,fp8,fp8,0,0.538105583190918
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,2,128,1,float16,fp8,0,0.5373136043548584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,4,128,1,float16,float16,0,0.5481616020202636
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,4,128,1,float16,fp8,0,0.5569712162017822
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,4,128,1,fp8,fp8,0,0.538478422164917
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,24,128,1,fp8,fp8,0,0.08483359813690186
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,8,128,1,float16,float16,0,0.5747695922851562
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,8,128,1,float16,fp8,0,0.5487423896789551
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,24,128,1,float16,float16,0,0.3638367891311646
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,1,128,1,float16,float16,0,0.2759056091308594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,24,128,1,fp8,fp8,0,0.3328991889953613
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,1,128,1,float16,fp8,0,0.2807071924209595
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,1,128,1,fp8,fp8,0,0.2906960010528564
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,2,128,1,float16,float16,0,0.27256801128387453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,2,128,1,float16,fp8,0,0.2808271884918213
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,2,128,1,fp8,fp8,0,0.28192160129547117
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,4,128,1,float16,float16,0,0.2808271884918213
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,8,128,1,float16,fp8,0,0.05143359899520874
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,4,128,1,float16,fp8,0,0.28095359802246095
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,4,128,1,fp8,fp8,0,0.2819375991821289
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,8,128,1,float16,float16,0,0.2960639953613281
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,8,128,1,float16,fp8,0,0.28122079372406006
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,8,128,1,fp8,fp8,0,0.2816704034805298
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,24,128,1,float16,float16,0,0.19143199920654297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,24,128,1,float16,fp8,0,0.17919039726257324
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,24,128,1,fp8,fp8,0,0.17972960472106933
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,1,128,1,float16,fp8,0,0.15143359899520875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,1,128,1,fp8,fp8,0,0.1518239974975586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,2,128,1,float16,float16,0,0.149726402759552
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,2,128,1,float16,fp8,0,0.15230400562286378
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,2,128,1,fp8,fp8,0,0.15186560153961182
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,4,128,1,float16,float16,0,0.15368640422821045
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,4,128,1,float16,fp8,0,0.15200480222702026
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,2,128,1,fp8,fp8,0,0.5390431880950928
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,4,128,1,fp8,fp8,0,0.15293920040130615
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,8,128,1,float16,float16,0,0.15973119735717772
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,8,128,1,float16,fp8,0,0.1524448037147522
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,8,128,1,fp8,fp8,0,0.15285279750823974
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,24,128,1,float16,float16,0,0.10631200075149536
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,24,128,1,float16,fp8,0,0.1015663981437683
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,24,128,1,fp8,fp8,0,0.10119999647140503
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,1,128,1,float16,float16,0,0.08596320152282715
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,1,128,1,float16,fp8,0,0.08697919845581055
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,1,128,1,fp8,fp8,0,0.08647040128707886
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,2,128,1,float16,float16,0,0.08608959913253784
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,2,128,1,float16,fp8,0,0.08614879846572876
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,2,128,1,fp8,fp8,0,0.08655200004577637
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,4,128,1,float16,float16,0,0.08796479701995849
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,24,128,1,float16,fp8,0,0.3341072082519531
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,4,128,1,float16,fp8,0,0.0864575982093811
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,4,128,1,fp8,fp8,0,0.08662239909172058
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,8,128,1,fp8,fp8,0,0.5460415840148926
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,8,128,1,float16,float16,0,0.09334880113601685
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,8,128,1,float16,fp8,0,0.08630560040473938
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,24,128,1,float16,float16,0,0.0640720009803772
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,24,128,1,float16,fp8,0,0.059772801399230954
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,1,128,1,float16,float16,0,0.05480319857597351
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,1,128,1,float16,fp8,0,0.05379679799079895
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,1,128,1,fp8,fp8,0,0.053416001796722415
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,2,128,1,float16,float16,0,0.054958397150039674
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,2,128,1,float16,fp8,0,0.05385439991950989
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,2,128,1,fp8,fp8,0,0.053478401899337766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,4,128,1,float16,fp8,0,0.05421440005302429
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,4,128,1,fp8,fp8,0,0.0534608006477356
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,8,128,1,float16,float16,0,0.05745919942855835
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,8,128,1,float16,fp8,0,0.05364959836006165
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,8,128,1,fp8,fp8,0,0.053904002904891966
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,24,128,1,float16,float16,0,0.03641279935836792
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,24,128,1,float16,fp8,0,0.03511039912700653
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,24,128,1,fp8,fp8,0,0.035124799609184264
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,1,128,1,float16,float16,0,0.03302719891071319
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,1,128,1,float16,fp8,0,0.03290880024433136
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,1,128,1,fp8,fp8,0,0.03277119994163513
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,2,128,1,float16,fp8,0,0.03304480016231537
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,2,128,1,fp8,fp8,0,0.0329584002494812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,1,128,1,float16,float16,0,0.14991999864578248
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,4,128,1,float16,fp8,0,0.03300639986991882
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,4,128,1,fp8,fp8,0,0.033022400736808774
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,8,128,1,float16,float16,0,0.03506560027599335
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,8,128,1,float16,fp8,0,0.03298879861831665
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,8,128,1,fp8,fp8,0,0.03287039995193482
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,2,128,1,float16,float16,0,0.533353614807129
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,1,128,1,float16,float16,0,1.0971360206604004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,8,128,1,fp8,fp8,0,0.08648480176925659
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,1,128,1,float16,fp8,0,1.153553581237793
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,24,128,1,fp8,fp8,0,0.05990880131721497
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,1,128,1,fp8,fp8,0,1.1504351615905761
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,4,128,1,float16,float16,0,0.05562400221824646
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,2,128,1,float16,float16,0,1.1352527618408204
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,2,128,1,float16,float16,0,0.032979199290275575
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,2,128,1,fp8,fp8,0,1.151857566833496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,4,128,1,float16,float16,0,1.1343215942382812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,4,128,1,float16,fp8,0,1.149177646636963
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,4,128,1,float16,float16,0,0.03332479894161224
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,4,128,1,fp8,fp8,0,1.1538895606994628
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,24,128,1,float16,float16,0,0.8027008056640625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,1,128,1,float16,float16,0,0.5496479988098144
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,24,128,1,float16,fp8,0,0.7232927799224853
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,24,128,1,fp8,fp8,0,0.7266143798828125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,8,128,1,float16,fp8,0,1.1538640022277833
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,8,128,1,float16,float16,0,1.2725071907043457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,8,128,1,fp8,fp8,0,1.152344036102295
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,2,128,1,float16,float16,0,0.5523856163024903
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,1,128,1,fp8,fp8,0,0.5857279777526856
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,2,128,1,fp8,fp8,0,0.585374402999878
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,4,128,1,float16,float16,0,0.6327904224395752
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,4,128,1,float16,fp8,0,0.5862703800201416
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,4,128,1,fp8,fp8,0,0.584876823425293
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,8,128,1,float16,float16,0,0.6165567874908447
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,24,128,1,float16,float16,0,0.4042719841003418
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,8,128,1,float16,fp8,0,0.5859519958496093
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,8,128,1,fp8,fp8,0,0.6176383972167969
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,24,128,1,float16,fp8,0,0.3718127965927124
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,1,128,1,float16,fp8,0,0.30260319709777833
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,24,128,1,fp8,fp8,0,0.3728048086166382
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,2,128,1,float16,float16,0,0.28621439933776854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,2,128,1,float16,fp8,0,0.30212318897247314
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,2,128,1,fp8,fp8,0,0.3025631904602051
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,2,128,1,float16,fp8,0,1.1541152000427246
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,4,128,1,float16,float16,0,0.29564800262451174
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,4,128,1,float16,fp8,0,0.30229599475860597
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,4,128,1,fp8,fp8,0,0.3028255939483643
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,8,128,1,float16,float16,0,0.31472320556640626
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,8,128,1,float16,fp8,0,0.3028287887573242
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,24,128,1,float16,float16,0,0.21026880741119386
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,8,128,1,fp8,fp8,0,0.3026992082595825
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,24,128,1,float16,fp8,0,0.19723039865493774
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,1,128,1,float16,float16,0,0.1544592022895813
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,24,128,1,fp8,fp8,0,0.19731839895248413
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,1,128,1,float16,fp8,0,0.16111199855804442
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,2,128,1,float16,float16,0,0.1544816017150879
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,2,128,1,float16,fp8,0,0.16068480014801026
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,2,128,1,fp8,fp8,0,0.16138399839401246
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,4,128,1,float16,float16,0,0.1600640058517456
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,4,128,1,float16,fp8,0,0.1614416003227234
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,4,128,1,fp8,fp8,0,0.16189440488815307
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,2,128,1,float16,fp8,0,0.5863344192504882
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,8,128,1,float16,float16,0,0.16883679628372192
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,8,128,1,float16,fp8,0,0.1622496008872986
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,24,128,1,float16,float16,0,0.11414239406585694
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,8,128,1,fp8,fp8,0,0.16140320301055908
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,24,128,1,float16,fp8,0,0.10877280235290528
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,24,128,1,fp8,fp8,0,0.10848799943923951
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,1,128,1,float16,fp8,0,0.08878880143165588
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,1,128,1,fp8,fp8,0,0.08886240124702453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,2,128,1,float16,float16,0,0.08827999830245972
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,2,128,1,float16,fp8,0,0.08909119963645935
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,2,128,1,fp8,fp8,0,0.08891199827194214
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,4,128,1,float16,float16,0,0.09070559740066528
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,4,128,1,float16,fp8,0,0.08952640295028687
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,1,128,1,fp8,fp8,0,0.31743679046630857
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,4,128,1,fp8,fp8,0,0.08919039964675904
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,8,128,1,float16,float16,0,0.0957423985004425
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,8,128,1,float16,fp8,0,0.08992639780044556
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,24,128,1,float16,float16,0,0.06669120192527771
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,8,128,1,fp8,fp8,0,0.09000639915466309
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,24,128,1,float16,fp8,0,0.0623088002204895
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,1,128,1,float16,float16,0,0.05271040201187134
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,1,128,1,float16,fp8,0,0.05325279831886291
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,1,128,1,fp8,fp8,0,0.053427201509475705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,2,128,1,float16,float16,0,0.05319039821624756
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,2,128,1,fp8,fp8,0,0.053350400924682614
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,4,128,1,float16,float16,0,0.05349439978599548
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,4,128,1,float16,fp8,0,0.05351200103759766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,4,128,1,fp8,fp8,0,0.05313119888305664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,8,128,1,float16,float16,0,0.05631840229034424
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,8,128,1,float16,fp8,0,0.053572797775268556
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,8,128,1,fp8,fp8,0,0.053483200073242185
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,24,128,1,float16,float16,0,0.040880000591278075
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,24,128,1,float16,fp8,0,0.04115999937057495
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,24,128,1,fp8,fp8,0,0.040561598539352414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,1,128,1,float16,float16,0,0.03713920116424561
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,1,128,1,float16,fp8,0,0.03687680065631867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,1,128,1,fp8,fp8,0,0.0367792010307312
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,2,128,1,float16,float16,0,0.03698880076408386
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,2,128,1,float16,fp8,0,0.03703519999980927
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,2,128,1,fp8,fp8,0,0.03699359893798828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,4,128,1,float16,float16,0,0.03747679889202118
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,4,128,1,float16,fp8,0,0.036852800846099855
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,4,128,1,fp8,fp8,0,0.03683519959449768
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,1,128,1,fp8,fp8,0,0.160697603225708
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,8,128,1,float16,float16,0,0.03819519877433777
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,8,128,1,float16,fp8,0,0.03687520027160644
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,8,128,1,fp8,fp8,0,0.03707520067691803
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,24,128,1,float16,float16,0,0.026830399036407472
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,24,128,1,float16,fp8,0,0.026855999231338502
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,1,128,1,float16,fp8,0,0.5958784103393555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,24,128,1,fp8,fp8,0,0.02683199942111969
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,1,128,1,float16,fp8,0,0.024803200364112855
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,2,128,1,float16,float16,0,0.024827200174331664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,1,128,1,fp8,fp8,0,0.024747200310230255
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,2,128,1,float16,fp8,0,0.02480800002813339
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,2,128,1,fp8,fp8,0,0.02476000040769577
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,4,128,1,float16,float16,0,0.0247871994972229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,4,128,1,float16,fp8,0,0.024795199930667877
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,8,128,1,float16,float16,0,0.027158400416374205
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,8,128,1,float16,fp8,0,0.024796800315380098
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,1,128,1,float16,float16,0,0.0880895972251892
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,1,128,1,float16,float16,0,0.2853087902069092
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,24,1,128,1,float16,float16,0,0.8130656242370605
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,24,1,128,1,float16,fp8,0,0.9053071975708008
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,24,128,1,fp8,fp8,0,0.06254720091819763
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,2,128,1,float16,fp8,0,0.05315679907798767
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,24,1,128,1,fp8,fp8,0,0.9040847778320312
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,24,2,128,1,float16,float16,0,0.8172512054443359
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,24,2,128,1,float16,fp8,0,0.9035344123840332
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,24,2,128,1,fp8,fp8,0,0.9050959587097168
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,24,4,128,1,float16,float16,0,0.856230354309082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,24,4,128,1,float16,fp8,0,0.9017248153686523
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,24,4,128,1,fp8,fp8,0,0.9522720336914062
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,24,8,128,1,float16,fp8,0,0.9023887634277343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,1,128,1,float16,float16,0,0.0247856006026268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,4,128,1,fp8,fp8,0,0.024753600358963013
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,24,8,128,1,fp8,fp8,0,0.9022975921630859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,8,128,1,fp8,fp8,0,0.024843199551105498
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,24,24,128,1,float16,fp8,0,0.6053103923797607
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,24,1,128,1,float16,float16,0,0.4144144058227539
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,24,1,128,1,float16,fp8,0,0.4591519832611084
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,24,24,128,1,fp8,fp8,0,0.5962048053741456
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,24,1,128,1,fp8,fp8,0,0.4593008041381836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,24,2,128,1,float16,float16,0,0.4164224147796631
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,24,2,128,1,float16,fp8,0,0.4620336055755615
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,24,2,128,1,fp8,fp8,0,0.45757761001586916
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,24,4,128,1,float16,float16,0,0.4361392021179199
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,24,4,128,1,float16,fp8,0,0.4596208095550537
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,24,4,128,1,fp8,fp8,0,0.4585616111755371
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,24,8,128,1,float16,float16,0,0.4755727767944336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,24,8,128,1,float16,fp8,0,0.4587967872619629
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,24,8,128,1,fp8,fp8,0,0.45775041580200193
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,24,24,128,1,float16,float16,0,0.33005599975585936
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,24,24,128,1,fp8,fp8,0,0.305401611328125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,24,1,128,1,float16,float16,0,0.21612319946289063
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,24,1,128,1,float16,fp8,0,0.237174391746521
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,24,1,128,1,fp8,fp8,0,0.23823680877685546
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,24,2,128,1,float16,fp8,0,0.23720960617065429
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,24,2,128,1,fp8,fp8,0,0.2365664005279541
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,24,4,128,1,float16,float16,0,0.22634239196777345
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,24,4,128,1,fp8,fp8,0,0.23714239597320558
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,24,8,128,1,float16,float16,0,0.24707200527191162
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,24,8,128,1,float16,fp8,0,0.23684799671173096
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,24,8,128,1,fp8,fp8,0,0.23775041103363037
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,24,24,128,1,float16,float16,0,0.1718384027481079
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,24,24,128,1,float16,fp8,0,0.1619279980659485
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,24,8,128,1,float16,float16,0,0.9387151718139648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,24,24,128,1,fp8,fp8,0,0.16044000387191773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,24,1,128,1,float16,float16,0,0.11839200258255005
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,24,24,128,1,float16,float16,0,0.6460783958435059
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,24,1,128,1,fp8,fp8,0,0.12591199874877929
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,24,2,128,1,float16,float16,0,0.11889280080795288
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,24,2,128,1,float16,fp8,0,0.12594239711761473
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,24,2,128,1,fp8,fp8,0,0.12603039741516114
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,24,4,128,1,float16,float16,0,0.12368639707565307
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,24,4,128,1,float16,fp8,0,0.12586400508880616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,24,4,128,1,fp8,fp8,0,0.1259376049041748
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,24,8,128,1,float16,float16,0,0.13220640420913696
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,24,8,128,1,float16,fp8,0,0.12738879919052123
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,24,24,128,1,float16,float16,0,0.09448320269584656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,24,8,128,1,fp8,fp8,0,0.1265519976615906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,24,24,128,1,float16,fp8,0,0.08887839913368226
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,24,24,128,1,fp8,fp8,0,0.08903840184211731
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,24,1,128,1,float16,float16,0,0.06729599833488464
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,24,1,128,1,float16,fp8,0,0.06995199918746949
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,24,2,128,1,float16,fp8,0,0.07022079825401306
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,24,1,128,1,fp8,fp8,0,0.07007039785385132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,24,2,128,1,fp8,fp8,0,0.06997119784355163
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,24,4,128,1,float16,float16,0,0.07053279876708984
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,24,4,128,1,fp8,fp8,0,0.07017599940299987
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,24,24,128,1,float16,fp8,0,0.30576000213623045
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,24,8,128,1,float16,float16,0,0.07569760084152222
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,24,8,128,1,float16,fp8,0,0.07172319889068604
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,24,8,128,1,fp8,fp8,0,0.07120640277862549
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,24,24,128,1,float16,float16,0,0.05380799770355225
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,24,2,128,1,float16,float16,0,0.2174448013305664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,24,24,128,1,float16,fp8,0,0.0497871994972229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,24,24,128,1,fp8,fp8,0,0.05086560249328613
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,24,1,128,1,float16,float16,0,0.04019039869308472
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,24,1,128,1,float16,fp8,0,0.041203200817108154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,24,1,128,1,fp8,fp8,0,0.04119040071964264
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,24,2,128,1,float16,float16,0,0.04023680090904236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,24,2,128,1,float16,fp8,0,0.0411871999502182
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,24,4,128,1,float16,float16,0,0.041212800145149234
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,24,4,128,1,float16,fp8,0,0.041193601489067075
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,24,2,128,1,fp8,fp8,0,0.041252800822258
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,24,4,128,1,fp8,fp8,0,0.04120480120182037
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,24,8,128,1,float16,float16,0,0.04343520104885101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,24,8,128,1,float16,fp8,0,0.041254401206970215
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,24,8,128,1,fp8,fp8,0,0.04123679995536804
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,24,24,128,1,float16,fp8,0,0.03298400044441223
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,24,24,128,1,fp8,fp8,0,0.03280960023403168
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,24,1,128,1,float16,float16,0,0.02725439965724945
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,24,1,128,1,float16,fp8,0,0.028515198826789857
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,24,1,128,1,fp8,fp8,0,0.02866399884223938
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,24,2,128,1,float16,float16,0,0.028198400139808656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,24,2,128,1,float16,fp8,0,0.028758400678634645
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,24,2,128,1,fp8,fp8,0,0.028939199447631837
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,24,4,128,1,float16,float16,0,0.028856000304222106
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,24,4,128,1,float16,fp8,0,0.028948798775672913
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,24,4,128,1,fp8,fp8,0,0.028892800211906433
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,24,8,128,1,float16,float16,0,0.02908959984779358
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,24,8,128,1,float16,fp8,0,0.028806400299072266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,24,8,128,1,fp8,fp8,0,0.028951999545097352
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,24,24,128,1,float16,float16,0,0.02070080041885376
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,24,24,128,1,float16,fp8,0,0.020692799985408784
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,24,24,128,1,fp8,fp8,0,0.021639999747276307
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,24,1,128,1,float16,float16,0,0.01868640035390854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,24,1,128,1,float16,fp8,0,0.12715519666671754
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,24,1,128,1,fp8,fp8,0,0.01866399943828583
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,24,1,128,1,float16,fp8,0,0.018742400407791137
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,24,2,128,1,float16,float16,0,0.01870719939470291
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,24,2,128,1,float16,fp8,0,0.018673600256443025
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,24,2,128,1,fp8,fp8,0,0.018825599551200868
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,24,4,128,1,float16,float16,0,0.01871519982814789
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,24,4,128,1,float16,fp8,0,0.019489599764347075
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,24,4,128,1,fp8,fp8,0,0.018723200261592864
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,24,8,128,1,float16,float16,0,0.020587199926376344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,24,8,128,1,float16,fp8,0,0.01871040016412735
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,24,8,128,1,fp8,fp8,0,0.01934240013360977
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,24,24,128,1,float16,float16,0,0.01874080002307892
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,24,24,128,1,float16,fp8,0,0.018742400407791137
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,24,1,128,1,float16,float16,0,0.018624000251293182
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,24,24,128,1,fp8,fp8,0,0.018721599876880646
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,24,1,128,1,fp8,fp8,0,0.018648000061511995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,24,2,128,1,float16,float16,0,0.01865279972553253
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,24,2,128,1,float16,fp8,0,0.018611200153827667
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,24,4,128,1,float16,float16,0,0.018667200207710268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,24,2,128,1,fp8,fp8,0,0.018670399487018586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,24,4,128,1,float16,fp8,0,0.01866399943828583
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,24,4,128,1,fp8,fp8,0,0.018646399676799773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,24,8,128,1,float16,float16,0,0.018636800348758698
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,24,8,128,1,float16,fp8,0,0.018646399676799773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,24,8,128,1,fp8,fp8,0,0.018654400110244752
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,24,2,128,1,float16,float16,0,0.06797119975090027
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,24,4,128,1,float16,fp8,0,0.06992480158805847
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,24,1,128,1,float16,float16,0,0.3422960042953491
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,24,1,128,1,float16,fp8,0,0.39130239486694335
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,24,1,128,1,fp8,fp8,0,0.3897887945175171
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,24,2,128,1,float16,float16,0,0.3429215908050537
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,24,2,128,1,float16,fp8,0,0.391811203956604
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,24,2,128,1,fp8,fp8,0,0.39127039909362793
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,24,4,128,1,float16,float16,0,0.36230719089508057
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,24,4,128,1,float16,fp8,0,0.39060800075531005
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,24,4,128,1,fp8,fp8,0,0.3913887977600098
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,24,24,128,1,float16,float16,0,0.03167040050029755
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,24,8,128,1,float16,float16,0,0.402894401550293
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,24,8,128,1,float16,fp8,0,0.3900000095367432
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,24,24,128,1,float16,float16,0,0.2891200065612793
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,24,24,128,1,float16,fp8,0,0.26885600090026857
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,24,1,128,1,float16,float16,0,0.17787840366363525
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,24,24,128,1,fp8,fp8,0,0.2693135976791382
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,24,1,128,1,float16,fp8,0,0.2013808012008667
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,24,1,128,1,fp8,fp8,0,0.20130879878997804
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,24,2,128,1,float16,fp8,0,0.2013200044631958
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,24,4,128,1,float16,float16,0,0.18771519660949706
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,24,4,128,1,float16,fp8,0,0.20166559219360353
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,24,4,128,1,fp8,fp8,0,0.20154879093170167
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,24,8,128,1,float16,float16,0,0.2079456090927124
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,24,1,128,1,float16,fp8,0,0.018644799292087556
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,24,8,128,1,float16,fp8,0,0.20145120620727539
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,24,24,128,1,float16,float16,0,0.15243519544601442
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,24,8,128,1,fp8,fp8,0,0.20211520195007324
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,24,24,128,1,float16,fp8,0,0.14226239919662476
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,24,24,128,1,fp8,fp8,0,0.14275200366973878
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,24,1,128,1,float16,float16,0,0.09859200119972229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,24,1,128,1,float16,fp8,0,0.10915679931640625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,24,1,128,1,fp8,fp8,0,0.10804640054702759
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,24,2,128,1,float16,float16,0,0.09841920137405395
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,24,2,128,1,float16,fp8,0,0.1090127944946289
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,24,2,128,1,fp8,fp8,0,0.1086400032043457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,24,4,128,1,float16,float16,0,0.1031391978263855
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,24,4,128,1,float16,fp8,0,0.10874240398406983
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,24,4,128,1,fp8,fp8,0,0.10885119438171387
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,24,8,128,1,float16,float16,0,0.11239999532699585
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,24,8,128,1,float16,fp8,0,0.10900640487670898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,24,8,128,1,fp8,fp8,0,0.10891679525375367
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,24,24,128,1,float16,float16,0,0.08452320098876953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,24,24,128,1,float16,fp8,0,0.08011680245399475
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,24,1,128,1,float16,float16,0,0.05725439786911011
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,24,24,128,1,fp8,fp8,0,0.08002560138702393
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,24,1,128,1,fp8,fp8,0,0.06064000129699707
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,24,2,128,1,float16,float16,0,0.05798400044441223
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,24,2,128,1,float16,fp8,0,0.06030719876289368
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,24,2,128,1,fp8,fp8,0,0.060603201389312744
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,24,4,128,1,float16,fp8,0,0.061300802230834964
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,24,4,128,1,fp8,fp8,0,0.061715197563171384
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,24,8,128,1,float16,float16,0,0.0650223970413208
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,24,8,128,1,float16,fp8,0,0.06183040142059326
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,24,8,128,1,fp8,fp8,0,0.0616752028465271
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,24,4,128,1,float16,fp8,0,0.23760480880737306
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,24,24,128,1,float16,fp8,0,0.04323199987411499
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,24,1,128,1,float16,float16,0,0.03297280073165894
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,24,24,128,1,fp8,fp8,0,0.04320000112056732
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,24,1,128,1,float16,fp8,0,0.03486399948596954
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,24,1,128,1,fp8,fp8,0,0.035076799988746646
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,24,2,128,1,float16,float16,0,0.03300319910049439
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,24,2,128,1,float16,fp8,0,0.03503040075302124
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,24,2,128,1,fp8,fp8,0,0.03505280017852783
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,24,4,128,1,float16,float16,0,0.034467199444770814
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,24,4,128,1,float16,fp8,0,0.034959998726844785
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,24,4,128,1,fp8,fp8,0,0.03511039912700653
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,24,8,128,1,float16,float16,0,0.03613759875297547
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,24,8,128,1,float16,fp8,0,0.03508319854736328
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,24,2,128,1,float16,float16,0,0.1790671944618225
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,24,8,128,1,fp8,fp8,0,0.03507040143013
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,24,24,128,1,float16,float16,0,0.028923198580741882
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,24,24,128,1,float16,fp8,0,0.02886880040168762
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,24,1,128,1,float16,float16,0,0.023792000114917757
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,24,1,128,1,float16,fp8,0,0.024801599979400634
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,24,2,128,1,fp8,fp8,0,0.2013983964920044
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,24,2,128,1,float16,float16,0,0.02343679964542389
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,24,2,128,1,float16,fp8,0,0.024753600358963013
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,24,2,128,1,fp8,fp8,0,0.0247871994972229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,24,4,128,1,float16,float16,0,0.02479359954595566
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,24,4,128,1,float16,fp8,0,0.024715200066566467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,24,4,128,1,fp8,fp8,0,0.024748800694942473
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,24,8,128,1,float16,float16,0,0.024795199930667877
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,24,8,128,1,float16,fp8,0,0.024796800315380098
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,24,24,128,1,float16,float16,0,0.018454399704933167
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,24,24,128,1,float16,fp8,0,0.01873279958963394
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,24,24,128,1,fp8,fp8,0,0.01865279972553253
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,24,1,128,1,float16,float16,0,0.016475200653076172
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,24,1,128,1,float16,fp8,0,0.016607999801635742
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,24,1,128,1,fp8,fp8,0,0.01658560037612915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,24,2,128,1,float16,float16,0,0.01661760061979294
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,24,2,128,1,float16,fp8,0,0.016620799899101257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,24,1,128,1,float16,fp8,0,0.06155679821968078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,24,4,128,1,float16,float16,0,0.06058719754219055
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,24,4,128,1,float16,float16,0,0.016598400473594666
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,24,4,128,1,float16,fp8,0,0.01663679927587509
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,24,4,128,1,fp8,fp8,0,0.016622400283813475
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,24,24,128,1,float16,float16,0,0.04710400104522705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,24,8,128,1,float16,fp8,0,0.01656319946050644
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,24,8,128,1,fp8,fp8,0,0.016596800088882445
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,24,24,128,1,float16,float16,0,0.01660960018634796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,24,24,128,1,float16,fp8,0,0.016771200299263
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,24,8,128,1,fp8,fp8,0,0.3906559944152832
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,24,24,128,1,fp8,fp8,0,0.016627199947834015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,24,1,128,1,float16,float16,0,0.014584000408649444
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,24,1,128,1,float16,fp8,0,0.01658560037612915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,24,1,128,1,fp8,fp8,0,0.014575999975204468
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,24,2,128,1,float16,float16,0,0.015662400424480437
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,24,2,128,1,float16,fp8,0,0.014545600116252898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,24,2,128,1,fp8,fp8,0,0.01544640064239502
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,24,4,128,1,float16,float16,0,0.015203200280666351
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,24,4,128,1,float16,fp8,0,0.015526400506496429
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,24,4,128,1,fp8,fp8,0,0.016414399445056915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,24,8,128,1,float16,float16,0,0.016612799465656282
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,24,8,128,1,float16,fp8,0,0.016390399634838106
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,24,1,128,1,fp8,fp8,0,0.024822400510311128
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,24,8,128,1,fp8,fp8,0,0.015707199275493623
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,24,24,128,1,float16,float16,0,0.01653439998626709
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,24,24,128,1,float16,fp8,0,0.014591999351978302
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,24,24,128,1,fp8,fp8,0,0.01610720008611679
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,24,1,128,1,float16,fp8,0,0.014577600359916686
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,24,1,128,1,fp8,fp8,0,0.014558400213718414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,24,2,128,1,float16,float16,0,0.014587199687957764
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,24,2,128,1,float16,fp8,0,0.014699199795722961
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,24,2,128,1,fp8,fp8,0,0.014526399970054626
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,24,4,128,1,float16,float16,0,0.014504000544548035
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,24,8,128,1,fp8,fp8,0,0.024723200500011443
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,24,4,128,1,float16,fp8,0,0.014545600116252898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,24,4,128,1,fp8,fp8,0,0.014588800072669984
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,24,8,128,1,float16,float16,0,0.014481599628925323
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,24,8,128,1,float16,fp8,0,0.014603200554847717
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,24,8,128,1,fp8,fp8,0,0.014574399590492249
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,24,1,128,1,float16,float16,0,0.21160640716552734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,24,1,128,1,fp8,fp8,0,0.23426399230957032
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,24,2,128,1,float16,float16,0,0.21181120872497558
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,24,2,128,1,float16,fp8,0,0.23431520462036132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,24,2,128,1,fp8,fp8,0,0.23394560813903809
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,24,4,128,1,float16,float16,0,0.22086720466613768
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,24,2,128,1,fp8,fp8,0,0.016622400283813475
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,24,4,128,1,float16,fp8,0,0.23386719226837158
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,24,4,128,1,fp8,fp8,0,0.23436479568481444
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,24,8,128,1,float16,fp8,0,0.2339103937149048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,24,8,128,1,float16,float16,0,0.23961760997772216
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,24,8,128,1,fp8,fp8,0,0.23424320220947265
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,24,24,128,1,float16,float16,0,0.1634400010108948
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,24,24,128,1,float16,fp8,0,0.15693440437316894
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,24,24,128,1,fp8,fp8,0,0.15644160509109498
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,24,1,128,1,float16,float16,0,0.11182240247726441
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,24,1,128,1,float16,fp8,0,0.12311040163040161
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,24,1,128,1,fp8,fp8,0,0.12200640439987183
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,24,2,128,1,float16,float16,0,0.11226079463958741
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,24,2,128,1,float16,fp8,0,0.12219040393829346
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,24,2,128,1,fp8,fp8,0,0.12236640453338624
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,24,4,128,1,float16,float16,0,0.11661920547485352
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,24,4,128,1,float16,fp8,0,0.12213120460510254
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,24,4,128,1,fp8,fp8,0,0.1223647952079773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,24,8,128,1,float16,float16,0,0.12541600465774536
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,24,8,128,1,float16,fp8,0,0.12292319536209106
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,24,8,128,1,fp8,fp8,0,0.12222559452056884
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,24,24,128,1,float16,float16,0,0.08688480257987977
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,24,24,128,1,float16,fp8,0,0.08417919874191285
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,24,1,128,1,float16,float16,0,0.014542399346828461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,24,1,128,1,float16,float16,0,0.06295040249824524
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,24,24,128,1,fp8,fp8,0,0.0844752013683319
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,24,1,128,1,fp8,fp8,0,0.06575999855995178
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,24,2,128,1,float16,float16,0,0.06225280165672302
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,24,2,128,1,float16,fp8,0,0.06635680198669433
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,24,4,128,1,float16,float16,0,0.06562880277633668
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,24,4,128,1,float16,fp8,0,0.06575359702110291
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,24,4,128,1,fp8,fp8,0,0.06714720129966736
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,24,8,128,1,float16,float16,0,0.06979680061340332
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,24,8,128,1,float16,fp8,0,0.06771680116653442
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,24,24,128,1,float16,float16,0,0.04954079985618591
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,24,8,128,1,fp8,fp8,0,0.06731680035591125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,24,24,128,1,fp8,fp8,0,0.04732480049133301
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,24,1,128,1,float16,float16,0,0.036668801307678224
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,24,1,128,1,float16,fp8,0,0.23418240547180175
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,24,1,128,1,fp8,fp8,0,0.039017599821090695
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,24,1,128,1,float16,fp8,0,0.039124798774719236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,24,2,128,1,float16,float16,0,0.03709760010242462
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,24,2,128,1,fp8,fp8,0,0.039131200313568114
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,24,4,128,1,float16,float16,0,0.03711999952793121
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,24,4,128,1,float16,fp8,0,0.03881120085716248
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,24,4,128,1,fp8,fp8,0,0.03912160098552704
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,24,8,128,1,float16,float16,0,0.03912799954414368
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,24,8,128,1,float16,fp8,0,0.039113599061965945
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,24,8,128,1,fp8,fp8,0,0.0389488011598587
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,24,24,128,1,float16,float16,0,0.026686400175094604
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,24,24,128,1,float16,fp8,0,0.02677600085735321
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,24,24,128,1,fp8,fp8,0,0.026897600293159483
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,24,1,128,1,float16,float16,0,0.022191999852657317
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,24,1,128,1,float16,fp8,0,0.02271520048379898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,24,1,128,1,fp8,fp8,0,0.02274399995803833
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,24,2,128,1,float16,float16,0,0.022515200078487396
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,24,2,128,1,float16,fp8,0,0.02268799990415573
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,24,2,128,1,fp8,fp8,0,0.022752000391483305
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,24,8,128,1,float16,float16,0,0.01668799966573715
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,24,4,128,1,float16,float16,0,0.022678400576114654
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,24,4,128,1,float16,fp8,0,0.02274720072746277
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,24,8,128,1,float16,float16,0,0.023255999386310577
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,24,8,128,1,float16,fp8,0,0.023100799322128295
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,24,8,128,1,fp8,fp8,0,0.02332960069179535
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,24,24,128,1,float16,float16,0,0.018676799535751343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,24,24,128,1,float16,fp8,0,0.019750399887561797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,24,24,128,1,fp8,fp8,0,0.019790400564670563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,24,1,128,1,float16,float16,0,0.016603200137615202
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,24,24,128,1,fp8,fp8,0,0.02889919877052307
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,24,1,128,1,float16,fp8,0,0.017339199781417847
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,24,1,128,1,fp8,fp8,0,0.017041599750518797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,24,2,128,1,float16,float16,0,0.016631999611854555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,24,2,128,1,float16,fp8,0,0.017323200404644013
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,24,2,128,1,fp8,fp8,0,0.016625599563121797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,24,4,128,1,float16,float16,0,0.016622400283813475
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,24,1,128,1,float16,fp8,0,0.06646879911422729
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,24,4,128,1,fp8,fp8,0,0.016590400040149687
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,24,8,128,1,float16,float16,0,0.01720480024814606
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,24,2,128,1,fp8,fp8,0,0.06580640077590942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,24,8,128,1,fp8,fp8,0,0.017521600425243377
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,24,24,128,1,float16,float16,0,0.012492799758911132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,24,24,128,1,float16,fp8,0,0.014049600064754485
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,24,24,128,1,fp8,fp8,0,0.012678399682044983
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,24,1,128,1,float16,float16,0,0.012548799812793731
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,24,1,128,1,float16,fp8,0,0.012468799948692322
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,24,24,128,1,float16,fp8,0,0.047512000799179076
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,24,2,128,1,float16,float16,0,0.012529599666595458
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,24,2,128,1,float16,fp8,0,0.012444800138473511
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,24,2,128,1,fp8,fp8,0,0.012534399330615998
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,24,4,128,1,float16,float16,0,0.012449599802494049
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,24,4,128,1,float16,fp8,0,0.012515200674533844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,24,4,128,1,fp8,fp8,0,0.012460800260305405
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,24,8,128,1,float16,float16,0,0.012486399710178375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,24,2,128,1,float16,fp8,0,0.038991999626159665
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,24,8,128,1,float16,fp8,0,0.012439999729394913
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,24,24,128,1,float16,float16,0,0.012508800625801087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,24,24,128,1,fp8,fp8,0,0.01247519999742508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,24,1,128,1,float16,float16,0,0.011052799969911575
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,24,1,128,1,float16,fp8,0,0.012489599734544754
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,24,1,128,1,fp8,fp8,0,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,24,2,128,1,float16,float16,0,0.010547199845314026
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,24,2,128,1,float16,fp8,0,0.010780800133943558
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,24,2,128,1,fp8,fp8,0,0.010876800119876861
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,24,4,128,1,float16,float16,0,0.010520000010728836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,24,4,128,1,float16,fp8,0,0.011051200330257416
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,24,4,128,1,fp8,fp8,0,0.022759999334812164
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,24,8,128,1,float16,float16,0,0.012444800138473511
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,24,8,128,1,float16,fp8,0,0.012401600182056428
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,24,8,128,1,fp8,fp8,0,0.012462399899959564
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,24,24,128,1,float16,float16,0,0.012404800206422806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,24,24,128,1,float16,fp8,0,0.012433599680662155
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,24,24,128,1,fp8,fp8,0,0.011400000005960465
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,24,1,128,1,float16,float16,0,0.0104592002928257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,24,1,128,1,float16,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,24,1,128,1,fp8,fp8,0,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,24,2,128,1,float16,float16,0,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,24,4,128,1,float16,fp8,0,0.016646400094032288
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,24,2,128,1,fp8,fp8,0,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,24,8,128,1,float16,fp8,0,0.01664000004529953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,24,4,128,1,float16,float16,0,0.0105103999376297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,24,4,128,1,float16,fp8,0,0.01051200032234192
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,24,4,128,1,fp8,fp8,0,0.010527999699115753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,24,8,128,1,float16,float16,0,0.010566399991512298
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,24,8,128,1,float16,fp8,0,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,24,8,128,1,fp8,fp8,0,0.011400000005960465
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,24,1,128,1,fp8,fp8,0,0.012467200309038163
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,24,1,128,1,float16,float16,0,0.16495840549468993
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,24,1,128,1,fp8,fp8,0,0.17479360103607178
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,24,2,128,1,float16,float16,0,0.16558879613876343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,24,2,128,1,float16,fp8,0,0.1750383973121643
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,24,2,128,1,fp8,fp8,0,0.175107204914093
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,24,4,128,1,float16,float16,0,0.17039840221405028
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,24,8,128,1,fp8,fp8,0,0.012486399710178375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,24,4,128,1,float16,fp8,0,0.17510559558868408
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,24,4,128,1,fp8,fp8,0,0.17529759407043458
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,24,8,128,1,float16,fp8,0,0.1749519944190979
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,24,24,128,1,float16,float16,0,0.11136800050735474
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,24,8,128,1,fp8,fp8,0,0.17519359588623046
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,24,24,128,1,float16,fp8,0,0.11023520231246949
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,24,24,128,1,fp8,fp8,0,0.10904799699783325
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,24,1,128,1,float16,float16,0,0.08929280042648316
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,24,1,128,1,float16,fp8,0,0.09223679900169372
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,24,1,128,1,fp8,fp8,0,0.09162399768829346
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,24,2,128,1,float16,fp8,0,0.09204480051994324
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,24,2,128,1,fp8,fp8,0,0.09148319959640502
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,24,4,128,1,float16,float16,0,0.09149760007858276
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,24,4,128,1,fp8,fp8,0,0.010952000319957734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,24,4,128,1,float16,fp8,0,0.09166240096092224
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,24,4,128,1,fp8,fp8,0,0.0923520028591156
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,24,8,128,1,float16,fp8,0,0.0926751971244812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,24,8,128,1,float16,float16,0,0.09526079893112183
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,24,8,128,1,fp8,fp8,0,0.09292640089988709
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,24,24,128,1,float16,float16,0,0.06151360273361206
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,24,24,128,1,float16,fp8,0,0.05973920226097107
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,24,1,128,1,float16,float16,0,0.04855999946594238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,24,1,128,1,float16,fp8,0,0.05036960244178772
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,24,1,128,1,fp8,fp8,0,0.050628799200057986
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,24,2,128,1,float16,float16,0,0.04864799976348877
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,24,2,128,1,float16,fp8,0,0.051235198974609375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,24,2,128,1,float16,fp8,0,0.011484800279140473
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,24,2,128,1,fp8,fp8,0,0.050993597507476805
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,24,4,128,1,float16,float16,0,0.05009120106697083
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,24,4,128,1,float16,fp8,0,0.05148000121116638
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,24,8,128,1,float16,float16,0,0.05311200022697449
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,24,8,128,1,float16,fp8,0,0.051283198595047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,24,8,128,1,fp8,fp8,0,0.05117599964141846
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,24,24,128,1,float16,float16,0,0.03309760093688965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,24,24,128,1,float16,fp8,0,0.03507040143013
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,24,24,128,1,fp8,fp8,0,0.03498240113258362
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,24,1,128,1,float16,float16,0,0.02898559868335724
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,24,24,128,1,float16,fp8,0,0.012387199699878693
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,24,1,128,1,float16,fp8,0,0.030876800417900085
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,24,1,128,1,fp8,fp8,0,0.030943998694419862
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,24,2,128,1,float16,float16,0,0.02892799973487854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,24,8,128,1,float16,float16,0,0.1787984013557434
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,24,2,128,1,float16,fp8,0,0.030844798684120177
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,24,2,128,1,fp8,fp8,0,0.03102239966392517
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,24,4,128,1,float16,float16,0,0.030635198950767516
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,24,4,128,1,float16,fp8,0,0.030939200520515443
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,24,8,128,1,float16,float16,0,0.030995199084281923
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,24,8,128,1,float16,fp8,0,0.03087199926376343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,24,8,128,1,fp8,fp8,0,0.03097440004348755
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,24,24,128,1,float16,float16,0,0.02069920003414154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,24,24,128,1,float16,fp8,0,0.020812800526618956
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,24,24,128,1,fp8,fp8,0,0.020683200657367708
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,24,1,128,1,float16,float16,0,0.018651199340820313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,24,1,128,1,float16,fp8,0,0.018681600689888
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,24,2,128,1,float16,float16,0,0.08974559903144837
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,24,1,128,1,fp8,fp8,0,0.018644799292087556
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,24,2,128,1,float16,fp8,0,0.018824000656604768
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,24,2,128,1,fp8,fp8,0,0.01865759938955307
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,24,4,128,1,float16,float16,0,0.018723200261592864
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,24,4,128,1,float16,fp8,0,0.01865919977426529
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,24,4,128,1,fp8,fp8,0,0.01900160014629364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,24,8,128,1,float16,float16,0,0.018756799399852753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,24,8,128,1,float16,fp8,0,0.01865600049495697
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,24,8,128,1,fp8,fp8,0,0.018622399866580965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,24,24,128,1,float16,float16,0,0.014779199659824372
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,24,24,128,1,float16,fp8,0,0.016531200706958772
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,24,1,128,1,float16,float16,0,0.014975999295711518
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,24,1,128,1,float16,fp8,0,0.014521600306034088
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,24,24,128,1,fp8,fp8,0,0.060227197408676145
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,24,1,128,1,fp8,fp8,0,0.014528000354766845
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,24,2,128,1,float16,float16,0,0.014534400403499603
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,24,2,128,1,float16,fp8,0,0.014552000164985656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,24,2,128,1,fp8,fp8,0,0.014556799829006196
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,24,4,128,1,float16,float16,0,0.014505599439144135
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,24,4,128,1,float16,fp8,0,0.014548799395561219
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,24,4,128,1,fp8,fp8,0,0.014537599682807923
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,24,8,128,1,float16,float16,0,0.014531199634075165
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,24,8,128,1,float16,fp8,0,0.01451520025730133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,24,8,128,1,fp8,fp8,0,0.014547200500965118
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,24,24,128,1,float16,float16,0,0.012203200161457062
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,24,24,128,1,float16,fp8,0,0.010527999699115753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,24,24,128,1,fp8,fp8,0,0.010679999738931656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,24,1,128,1,float16,float16,0,0.010552000254392624
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,24,1,128,1,float16,fp8,0,0.010524799674749374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,24,1,128,1,fp8,fp8,0,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,24,2,128,1,float16,fp8,0,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,24,2,128,1,fp8,fp8,0,0.010444799810647965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,24,4,128,1,float16,float16,0,0.010515200346708298
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,24,4,128,1,float16,fp8,0,0.010476800054311753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,24,8,128,1,float16,float16,0,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,24,4,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,24,4,128,1,fp8,fp8,0,0.05101119875907898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,24,8,128,1,float16,fp8,0,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,24,8,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,24,24,128,1,float16,float16,0,0.010391999781131745
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,24,24,128,1,float16,fp8,0,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,24,1,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,24,1,128,1,float16,fp8,0,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,24,1,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,24,2,128,1,float16,float16,0,0.010471999645233154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,24,1,128,1,float16,fp8,0,0.17458399534225463
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,24,2,128,1,float16,fp8,0,0.010444799810647965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,24,2,128,1,fp8,fp8,0,0.010452800244092942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,24,4,128,1,float16,fp8,0,0.010446400195360184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,24,4,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,24,4,128,1,fp8,fp8,0,0.01043040007352829
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,24,8,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,24,8,128,1,float16,fp8,0,0.010436800122261048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,24,24,128,1,float16,fp8,0,0.010489600151777268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,24,8,128,1,fp8,fp8,0,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,24,24,128,1,float16,float16,0,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,24,24,128,1,fp8,fp8,0,0.010487999767065048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,24,1,128,1,float16,float16,0,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,24,1,128,1,float16,fp8,0,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,24,1,128,1,fp8,fp8,0,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,24,2,128,1,float16,float16,0,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,24,2,128,1,float16,fp8,0,0.010369600355625152
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,24,2,128,1,fp8,fp8,0,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,24,4,128,1,float16,float16,0,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,24,4,128,1,float16,fp8,0,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,24,4,128,1,fp8,fp8,0,0.0104592002928257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,24,4,128,1,fp8,fp8,0,0.030961599946022034
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,24,8,128,1,float16,float16,0,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,24,8,128,1,fp8,fp8,0,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,24,1,128,1,float16,float16,0,0.1428704023361206
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,24,1,128,1,float16,fp8,0,0.14597599506378173
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,24,1,128,1,fp8,fp8,0,0.14670720100402831
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,24,2,128,1,float16,float16,0,0.1427680015563965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,24,2,128,1,float16,fp8,0,0.1464192032814026
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,24,2,128,1,fp8,fp8,0,0.14635839462280273
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,24,4,128,1,float16,float16,0,0.1456447958946228
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,24,4,128,1,float16,fp8,0,0.1458415985107422
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,24,4,128,1,fp8,fp8,0,0.14692480564117433
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,24,8,128,1,float16,float16,0,0.14868320226669313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,24,8,128,1,float16,fp8,0,0.14668480157852173
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,24,8,128,1,fp8,fp8,0,0.146288001537323
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,24,24,128,1,float16,float16,0,0.08821120262145996
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,24,24,128,1,float16,fp8,0,0.08565919995307922
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,24,2,128,1,float16,float16,0,0.010452800244092942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,24,24,128,1,fp8,fp8,0,0.08580639958381653
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,24,1,128,1,float16,float16,0,0.07604960203170777
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,24,1,128,1,float16,fp8,0,0.07671999931335449
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,24,2,128,1,float16,float16,0,0.07669919729232788
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,24,2,128,1,fp8,fp8,0,0.07725279927253723
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,24,2,128,1,float16,fp8,0,0.07714239954948425
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,24,4,128,1,float16,float16,0,0.0776416003704071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,24,24,128,1,fp8,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,24,4,128,1,fp8,fp8,0,0.07742559909820557
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,24,8,128,1,float16,float16,0,0.08066400289535522
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,24,8,128,1,float16,fp8,0,0.07707039713859558
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,24,8,128,1,fp8,fp8,0,0.07747359871864319
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,24,24,128,1,float16,float16,0,0.04731520116329193
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,24,24,128,1,float16,fp8,0,0.04759039878845215
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,24,24,128,1,fp8,fp8,0,0.04734239876270294
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,24,1,128,1,float16,float16,0,0.043356800079345705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,24,8,128,1,float16,fp8,0,0.010364799946546554
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,24,1,128,1,fp8,fp8,0,0.043505600094795226
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,24,2,128,1,float16,float16,0,0.043331199884414674
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,24,1,128,1,float16,fp8,0,0.04326080083847046
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,24,2,128,1,float16,fp8,0,0.043398401141166686
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,24,2,128,1,fp8,fp8,0,0.04325760006904602
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,24,4,128,1,float16,float16,0,0.04342719912528992
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,24,2,128,1,float16,float16,0,0.01863359957933426
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,24,4,128,1,float16,fp8,0,0.04324640035629272
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,24,4,128,1,fp8,fp8,0,0.04316799938678741
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,24,8,128,1,float16,float16,0,0.04466240108013153
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,24,8,128,1,float16,fp8,0,0.04345279932022095
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,24,8,128,1,fp8,fp8,0,0.04332480132579804
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,24,24,128,1,float16,float16,0,0.02879199981689453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,24,24,128,1,float16,fp8,0,0.028896000981330872
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,24,24,128,1,fp8,fp8,0,0.02884800136089325
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,24,1,128,1,float16,float16,0,0.026822400093078614
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,24,1,128,1,float16,fp8,0,0.026833599805831908
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,24,1,128,1,fp8,fp8,0,0.0268528014421463
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,24,2,128,1,float16,float16,0,0.026833599805831908
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,24,4,128,1,float16,float16,0,0.026862400770187377
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,24,2,128,1,float16,fp8,0,0.026841598749160766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,24,4,128,1,float16,fp8,0,0.026787200570106508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,24,24,128,1,fp8,fp8,0,0.016201600432395935
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,24,8,128,1,float16,fp8,0,0.026756799221038817
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,24,8,128,1,fp8,fp8,0,0.026836800575256347
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,24,24,128,1,float16,float16,0,0.018580800294876097
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,24,24,128,1,float16,fp8,0,0.018764799833297728
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,24,24,128,1,fp8,fp8,0,0.018566399812698364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,24,1,128,1,float16,float16,0,0.016652800142765045
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,24,1,128,1,float16,fp8,0,0.01656160056591034
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,24,1,128,1,fp8,fp8,0,0.016612799465656282
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,24,2,128,1,float16,float16,0,0.016599999368190767
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,24,2,128,1,float16,fp8,0,0.016924799978733064
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,24,2,128,1,fp8,fp8,0,0.016595199704170227
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,24,4,128,1,float16,float16,0,0.017126399278640746
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,24,4,128,1,float16,fp8,0,0.017233599722385407
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,24,4,128,1,fp8,fp8,0,0.016616000235080718
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,24,8,128,1,float16,float16,0,0.0182559996843338
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,24,8,128,1,float16,fp8,0,0.01711679995059967
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,24,8,128,1,fp8,fp8,0,0.0171984001994133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,24,1,128,1,fp8,fp8,0,0.0773248016834259
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,24,24,128,1,float16,float16,0,0.014561599493026734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,24,24,128,1,float16,fp8,0,0.014484800398349762
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,24,24,128,1,fp8,fp8,0,0.014601600170135499
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,24,1,128,1,float16,float16,0,0.013121600449085235
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,24,1,128,1,fp8,fp8,0,0.014108799397945404
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,24,2,128,1,float16,float16,0,0.01321759968996048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,24,2,128,1,fp8,fp8,0,0.013967999815940857
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,24,4,128,1,float16,fp8,0,0.014076800644397735
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,24,4,128,1,float16,fp8,0,0.07690240144729614
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,24,4,128,1,float16,float16,0,0.013504000008106231
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,24,4,128,1,fp8,fp8,0,0.01406240016222
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,24,8,128,1,float16,float16,0,0.012534399330615998
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,24,8,128,1,float16,fp8,0,0.014446400105953217
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,24,24,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,24,24,128,1,float16,fp8,0,0.010503999888896942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,24,24,128,1,fp8,fp8,0,0.010486400127410889
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,24,1,128,1,float16,float16,0,0.010494399815797806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,24,1,128,1,float16,fp8,0,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,24,1,128,1,fp8,fp8,0,0.0104592002928257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,24,2,128,1,float16,float16,0,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,24,2,128,1,float16,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,24,2,128,1,fp8,fp8,0,0.010425599664449692
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,24,4,128,1,float16,float16,0,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,24,4,128,1,float16,fp8,0,0.010500799864530563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,24,4,128,1,fp8,fp8,0,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,24,8,128,1,float16,float16,0,0.010492800176143647
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,24,8,128,1,float16,fp8,0,0.010476800054311753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,24,8,128,1,fp8,fp8,0,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,24,24,128,1,float16,float16,0,0.010436800122261048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,24,24,128,1,float16,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,24,24,128,1,fp8,fp8,0,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,24,1,128,1,float16,float16,0,0.010446400195360184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,24,1,128,1,float16,fp8,0,0.010428799688816071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,24,1,128,1,fp8,fp8,0,0.010520000010728836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,24,2,128,1,fp8,fp8,0,0.026815998554229736
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,24,2,128,1,float16,float16,0,0.010489600151777268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,24,2,128,1,float16,fp8,0,0.010395199805498124
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,24,8,128,1,float16,float16,0,0.027243199944496154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,24,2,128,1,fp8,fp8,0,0.010391999781131745
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,24,4,128,1,float16,fp8,0,0.010412800312042236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,24,4,128,1,fp8,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,24,8,128,1,float16,float16,0,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,24,8,128,1,float16,fp8,0,0.010366400331258773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,24,24,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,24,8,128,1,fp8,fp8,0,0.010436800122261048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,24,24,128,1,float16,fp8,0,0.00968960002064705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,24,24,128,1,fp8,fp8,0,0.010388799756765366
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,24,1,128,1,float16,float16,0,0.00849440023303032
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,24,1,128,1,float16,fp8,0,0.00942559987306595
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,24,1,128,1,fp8,fp8,0,0.009332799911499023
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,24,2,128,1,float16,float16,0,0.008870399743318557
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,24,2,128,1,float16,fp8,0,0.009388799965381622
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,24,2,128,1,fp8,fp8,0,0.009598399698734283
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,24,4,128,1,float16,float16,0,0.01048479974269867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,24,4,128,1,float16,fp8,0,0.009511999785900116
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,24,4,128,1,fp8,fp8,0,0.010372799634933472
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,24,8,128,1,float16,float16,0,0.009358400106430053
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,24,8,128,1,float16,fp8,0,0.010391999781131745
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,24,8,128,1,fp8,fp8,0,0.009588800370693207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,24,1,128,1,float16,fp8,0,0.014078399538993836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,24,2,128,1,float16,fp8,0,0.01404159963130951
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,24,1,128,1,float16,fp8,0,0.13146239519119263
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,24,1,128,1,float16,float16,0,0.13755199909210206
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,24,1,128,1,fp8,fp8,0,0.1312608003616333
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,24,2,128,1,float16,float16,0,0.1359984040260315
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,24,8,128,1,fp8,fp8,0,0.012886400520801543
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,24,2,128,1,float16,fp8,0,0.13216960430145264
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,24,2,128,1,fp8,fp8,0,0.13207999467849732
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,24,4,128,1,float16,fp8,0,0.1322111964225769
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,24,4,128,1,float16,float16,0,0.13657280206680297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,24,4,128,1,fp8,fp8,0,0.1314527988433838
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,24,8,128,1,float16,float16,0,0.14041919708251954
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,24,8,128,1,float16,fp8,0,0.13231359720230101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,24,24,128,1,float16,float16,0,0.07857279777526856
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,24,24,128,1,float16,fp8,0,0.07452800273895263
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,24,24,128,1,fp8,fp8,0,0.07515199780464173
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,24,1,128,1,float16,float16,0,0.07436800003051758
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,24,1,128,1,float16,fp8,0,0.07037280201911926
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,24,1,128,1,fp8,fp8,0,0.07056319713592529
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,24,2,128,1,float16,float16,0,0.07455679774284363
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,24,2,128,1,float16,fp8,0,0.07081279754638672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,24,2,128,1,fp8,fp8,0,0.07018240094184876
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,24,4,128,1,float16,float16,0,0.07444959878921509
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,24,4,128,1,float16,fp8,0,0.07065759897232056
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,24,4,128,1,fp8,fp8,0,0.07107840180397033
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,24,8,128,1,float16,fp8,0,0.07067520022392274
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,24,8,128,1,float16,float16,0,0.07568320035934448
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,24,24,128,1,float16,fp8,0,0.042115199565887454
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,24,24,128,1,fp8,fp8,0,0.042366400361061096
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,24,8,128,1,fp8,fp8,0,0.07103999853134155
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,24,1,128,1,float16,float16,0,0.04207360148429871
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,24,1,128,1,float16,fp8,0,0.04046080112457275
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,24,1,128,1,fp8,fp8,0,0.04023999869823456
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,24,2,128,1,float16,float16,0,0.04289279878139496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,24,2,128,1,float16,fp8,0,0.04012160003185272
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,24,4,128,1,float16,float16,0,0.04297919869422913
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,24,4,128,1,fp8,fp8,0,0.039822399616241455
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,24,8,128,1,float16,float16,0,0.043244799971580504
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,24,8,128,1,float16,fp8,0,0.03986720144748688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,24,8,128,1,fp8,fp8,0,0.040006399154663086
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,24,4,128,1,fp8,fp8,0,0.026868799328804018
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,24,24,128,1,float16,float16,0,0.02682720124721527
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,24,24,128,1,float16,fp8,0,0.025884801149368288
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,24,4,128,1,float16,float16,0,0.010335999727249145
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,24,24,128,1,fp8,fp8,0,0.025158399343490602
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,24,1,128,1,float16,fp8,0,0.024780799448490144
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,24,1,128,1,fp8,fp8,0,0.024716800451278685
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,24,2,128,1,float16,float16,0,0.02534720003604889
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,24,2,128,1,float16,fp8,0,0.024742400646209715
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,24,2,128,1,fp8,fp8,0,0.024775999784469604
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,24,4,128,1,float16,float16,0,0.025230398774147032
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,24,4,128,1,float16,fp8,0,0.024742400646209715
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,24,4,128,1,fp8,fp8,0,0.024718399345874786
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,24,8,128,1,float16,float16,0,0.026454401016235352
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,24,8,128,1,float16,fp8,0,0.024777600169181825
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,24,24,128,1,float16,float16,0,0.016995200514793397
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,24,24,128,1,float16,fp8,0,0.016545599699020384
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,24,8,128,1,fp8,fp8,0,0.0247871994972229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,24,24,128,1,fp8,fp8,0,0.016612799465656282
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,24,1,128,1,float16,float16,0,0.0165583997964859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,24,1,128,1,float16,fp8,0,0.01650400012731552
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,24,1,128,1,fp8,fp8,0,0.016553600132465363
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,24,2,128,1,float16,float16,0,0.016630400717258454
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,24,2,128,1,fp8,fp8,0,0.01660960018634796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,24,4,128,1,float16,float16,0,0.01658080071210861
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,24,4,128,1,float16,fp8,0,0.016574400663375854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,24,4,128,1,fp8,fp8,0,0.016572800278663636
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,24,8,128,1,float16,float16,0,0.0166143998503685
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,24,8,128,1,float16,fp8,0,0.016572800278663636
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,24,8,128,1,fp8,fp8,0,0.01656000018119812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,24,24,128,1,float16,float16,0,0.014428800344467163
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,24,24,128,1,float16,fp8,0,0.012503999471664428
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,24,24,128,1,fp8,fp8,0,0.012464000284671784
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,24,8,128,1,fp8,fp8,0,0.13216160535812377
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,24,1,128,1,float16,float16,0,0.012539200484752655
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,24,1,128,1,float16,fp8,0,0.012569600343704223
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,24,1,128,1,fp8,fp8,0,0.012508800625801087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,24,2,128,1,float16,float16,0,0.01255200058221817
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,24,2,128,1,float16,fp8,0,0.012481600046157837
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,24,2,128,1,fp8,fp8,0,0.012494400143623352
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,24,4,128,1,float16,float16,0,0.01255359947681427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,24,4,128,1,float16,fp8,0,0.012556800246238708
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,24,4,128,1,fp8,fp8,0,0.012457600235939026
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,24,8,128,1,float16,fp8,0,0.012515200674533844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,24,24,128,1,float16,float16,0,0.04478879868984222
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,24,8,128,1,fp8,fp8,0,0.01244800016283989
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,24,24,128,1,float16,float16,0,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,24,24,128,1,float16,fp8,0,0.010419200360774993
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,24,24,128,1,fp8,fp8,0,0.010503999888896942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,24,1,128,1,float16,float16,0,0.010436800122261048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,24,1,128,1,float16,fp8,0,0.01048479974269867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,24,1,128,1,fp8,fp8,0,0.010428799688816071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,24,4,128,1,float16,fp8,0,0.04077120125293732
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,24,2,128,1,fp8,fp8,0,0.010471999645233154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,24,2,128,1,float16,fp8,0,0.010384000092744827
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,24,4,128,1,float16,float16,0,0.01037440001964569
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,24,4,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,24,4,128,1,fp8,fp8,0,0.010380800068378448
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,24,8,128,1,float16,fp8,0,0.010363200306892395
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,24,1,128,1,float16,float16,0,0.025915199518203737
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,24,8,128,1,fp8,fp8,0,0.01032159999012947
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,24,24,128,1,float16,float16,0,0.01048159971833229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,24,24,128,1,float16,fp8,0,0.010300800204277039
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,24,24,128,1,fp8,fp8,0,0.01037440001964569
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,24,1,128,1,float16,float16,0,0.010366400331258773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,24,1,128,1,float16,fp8,0,0.009454400092363358
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,24,1,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,24,2,128,1,float16,float16,0,0.010446400195360184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,24,2,128,1,float16,fp8,0,0.010371199995279311
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,24,2,128,1,fp8,fp8,0,0.009406399726867676
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,24,4,128,1,float16,float16,0,0.010345599800348281
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,24,4,128,1,float16,fp8,0,0.010419200360774993
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,24,4,128,1,fp8,fp8,0,0.010375999659299851
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,24,2,128,1,float16,fp8,0,0.01650079935789108
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,24,8,128,1,float16,fp8,0,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,24,8,128,1,float16,float16,0,0.010412800312042236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,24,24,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,24,24,128,1,float16,fp8,0,0.010305599868297577
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,24,24,128,1,fp8,fp8,0,0.010364799946546554
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,24,1,128,1,float16,float16,0,0.008478400111198426
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,24,1,128,1,float16,fp8,0,0.008446399867534638
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,24,1,128,1,fp8,fp8,0,0.008393599838018417
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,24,2,128,1,float16,float16,0,0.009635200351476669
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,24,2,128,1,fp8,fp8,0,0.008376000076532364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,24,4,128,1,float16,float16,0,0.010139200091361999
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,24,4,128,1,float16,fp8,0,0.008398400247097015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,24,4,128,1,fp8,fp8,0,0.00843520015478134
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,24,8,128,1,float16,float16,0,0.010340800136327743
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,24,8,128,1,float16,fp8,0,0.008380799740552902
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,24,8,128,1,fp8,fp8,0,0.01035040020942688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,24,1,128,1,float16,float16,0,0.13445440530776978
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,24,1,128,1,float16,fp8,0,0.12565120458602905
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,24,8,128,1,float16,float16,0,0.012486399710178375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,24,1,128,1,fp8,fp8,0,0.12591999769210815
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,24,2,128,1,float16,float16,0,0.13401919603347778
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,24,2,128,1,fp8,fp8,0,0.12534719705581665
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,24,4,128,1,float16,float16,0,0.13490240573883056
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,24,4,128,1,float16,fp8,0,0.1253376007080078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,24,2,128,1,fp8,fp8,0,0.04037440121173859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,24,2,128,1,float16,float16,0,0.010475199669599533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,24,4,128,1,fp8,fp8,0,0.126363205909729
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,24,8,128,1,float16,float16,0,0.13369120359420777
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,24,24,128,1,float16,float16,0,0.07384639978408813
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,24,24,128,1,float16,fp8,0,0.06855679750442505
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,24,8,128,1,float16,fp8,0,0.12548160552978516
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,24,8,128,1,fp8,fp8,0,0.12643680572509766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,24,24,128,1,fp8,fp8,0,0.06857920289039612
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,24,1,128,1,float16,float16,0,0.07270399928092956
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,24,1,128,1,float16,fp8,0,0.06803680062294007
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,24,1,128,1,fp8,fp8,0,0.0686352014541626
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,24,2,128,1,float16,float16,0,0.07263519763946533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,24,2,128,1,float16,fp8,0,0.06805440187454223
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,24,2,128,1,fp8,fp8,0,0.06779839992523193
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,24,4,128,1,float16,float16,0,0.07321439981460572
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,24,4,128,1,float16,fp8,0,0.06809920072555542
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,24,4,128,1,fp8,fp8,0,0.06823840141296386
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,24,8,128,1,float16,float16,0,0.0725600004196167
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,24,8,128,1,float16,fp8,0,0.06833119988441468
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,24,8,128,1,fp8,fp8,0,0.06811839938163758
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,24,24,128,1,float16,fp8,0,0.03917759954929352
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,24,1,128,1,float16,float16,0,0.04150879979133606
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,24,1,128,1,float16,fp8,0,0.03892160058021545
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,24,1,128,1,fp8,fp8,0,0.03916800022125244
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,24,2,128,1,float16,float16,0,0.04129759967327118
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,24,8,128,1,fp8,fp8,0,0.01035040020942688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,24,2,128,1,float16,fp8,0,0.03913280069828033
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,24,2,128,1,fp8,fp8,0,0.03913280069828033
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,24,4,128,1,float16,float16,0,0.04175199866294861
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,24,4,128,1,float16,fp8,0,0.03904959857463837
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,24,4,128,1,fp8,fp8,0,0.03915199935436249
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,24,8,128,1,float16,float16,0,0.0411871999502182
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,24,8,128,1,float16,fp8,0,0.03909119963645935
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,24,2,128,1,float16,fp8,0,0.008376000076532364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,24,8,128,1,fp8,fp8,0,0.038940799236297605
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,24,24,128,1,float16,fp8,0,0.02456959933042526
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,24,24,128,1,fp8,fp8,0,0.024139200150966645
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,24,1,128,1,float16,float16,0,0.024961599707603456
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,24,1,128,1,float16,fp8,0,0.02362080067396164
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,24,1,128,1,fp8,fp8,0,0.024777600169181825
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,24,2,128,1,float16,float16,0,0.025040000677108765
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,24,2,128,1,float16,fp8,0,0.024780799448490144
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,24,2,128,1,fp8,fp8,0,0.02466080039739609
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,24,4,128,1,float16,float16,0,0.024779200553894043
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,24,4,128,1,float16,fp8,0,0.024711999297142028
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,24,4,128,1,fp8,fp8,0,0.024059200286865236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,24,8,128,1,float16,float16,0,0.025094398856163026
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,24,8,128,1,float16,fp8,0,0.024297599494457246
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,24,8,128,1,fp8,fp8,0,0.02476319968700409
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,24,24,128,1,float16,float16,0,0.016579200327396394
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,24,24,128,1,float16,fp8,0,0.01650719940662384
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,24,24,128,1,fp8,fp8,0,0.01622239947319031
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,24,1,128,1,float16,fp8,0,0.014958399534225463
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,24,1,128,1,float16,float16,0,0.016603200137615202
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,24,1,128,1,fp8,fp8,0,0.015193599462509155
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,24,2,128,1,float16,fp8,0,0.1259343981742859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,24,2,128,1,fp8,fp8,0,0.016518400609493257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,24,4,128,1,float16,float16,0,0.016612799465656282
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,24,4,128,1,float16,fp8,0,0.016510400176048278
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,24,4,128,1,fp8,fp8,0,0.015800000727176668
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,24,8,128,1,float16,float16,0,0.01659359931945801
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,24,8,128,1,float16,fp8,0,0.016312000155448914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,24,8,128,1,float16,float16,0,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,24,8,128,1,fp8,fp8,0,0.016200000047683717
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,24,24,128,1,float16,float16,0,0.014529600739479065
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,24,24,128,1,fp8,fp8,0,0.012465599924325943
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,24,1,128,1,float16,float16,0,0.01247519999742508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,24,1,128,1,float16,fp8,0,0.01244639977812767
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,24,1,128,1,fp8,fp8,0,0.012492799758911132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,24,24,128,1,float16,float16,0,0.0418287992477417
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,24,2,128,1,float16,float16,0,0.012680000066757202
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,24,2,128,1,float16,fp8,0,0.012513600289821625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,24,2,128,1,fp8,fp8,0,0.012513600289821625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,24,4,128,1,float16,float16,0,0.012939199805259705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,24,4,128,1,fp8,fp8,0,0.012585599720478059
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,24,8,128,1,float16,float16,0,0.012956799566745758
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,24,8,128,1,float16,fp8,0,0.012540799379348756
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,24,8,128,1,fp8,fp8,0,0.0125231996178627
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,24,24,128,1,float16,float16,0,0.0267551988363266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,24,24,128,1,float16,float16,0,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,24,24,128,1,float16,fp8,0,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,24,24,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,24,1,128,1,float16,float16,0,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,24,1,128,1,float16,fp8,0,0.009505599737167358
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,24,1,128,1,fp8,fp8,0,0.010507199913263321
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,24,2,128,1,float16,float16,0,0.010489600151777268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,24,2,128,1,float16,fp8,0,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,24,4,128,1,float16,float16,0,0.010428799688816071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,24,4,128,1,float16,fp8,0,0.010406400263309478
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,24,4,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,24,8,128,1,float16,float16,0,0.010487999767065048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,24,8,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,24,8,128,1,fp8,fp8,0,0.010375999659299851
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,24,2,128,1,float16,float16,0,0.01659200042486191
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,24,24,128,1,float16,float16,0,0.010420800000429154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,24,24,128,1,float16,fp8,0,0.009344000369310379
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,24,24,128,1,fp8,fp8,0,0.010475199669599533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,24,1,128,1,float16,float16,0,0.010424000024795533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,24,1,128,1,float16,fp8,0,0.00883840024471283
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,24,1,128,1,fp8,fp8,0,0.01040479987859726
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,24,2,128,1,float16,float16,0,0.010487999767065048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,24,2,128,1,float16,fp8,0,0.009064000099897385
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,24,2,128,1,fp8,fp8,0,0.010471999645233154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,24,4,128,1,float16,float16,0,0.0104592002928257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,24,4,128,1,float16,fp8,0,0.009111999720335006
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,24,4,128,1,fp8,fp8,0,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,24,8,128,1,float16,float16,0,0.0104592002928257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,24,8,128,1,float16,fp8,0,0.009145600348711013
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,24,8,128,1,fp8,fp8,0,0.010532800108194351
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,24,24,128,1,float16,float16,0,0.010520000010728836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,24,24,128,1,float16,fp8,0,0.008392000198364257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,24,24,128,1,fp8,fp8,0,0.010664000362157821
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,24,1,128,1,float16,float16,0,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,24,1,128,1,float16,fp8,0,0.008390399813652038
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,24,2,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,24,2,128,1,float16,fp8,0,0.008910399675369263
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,24,2,128,1,fp8,fp8,0,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,24,24,128,1,float16,fp8,0,0.01250080019235611
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,24,4,128,1,float16,float16,0,0.010396800190210342
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,24,4,128,1,float16,fp8,0,0.0104032002389431
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,24,4,128,1,fp8,fp8,0,0.010369600355625152
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,24,8,128,1,float16,float16,0,0.01043040007352829
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,24,8,128,1,float16,fp8,0,0.008604799956083297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,24,8,128,1,fp8,fp8,0,0.01037440001964569
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,24,24,128,1,fp8,fp8,0,0.03911199867725372
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,24,4,128,1,float16,fp8,0,0.012588800489902496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,24,2,128,1,fp8,fp8,0,0.010380800068378448
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,24,2,128,1,float16,fp8,0,0.015172800421714783
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,24,1,128,1,fp8,fp8,0,0.010023999959230423
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,1,128,1,fp8,fp8,0,5.589188766479492
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,1,128,1,float16,fp8,0,5.6184944152832035
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,1,128,1,float16,float16,0,6.93900146484375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,2,128,1,float16,float16,0,7.04429931640625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,2,128,1,fp8,fp8,0,5.586881637573242
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,4,128,1,float16,fp8,0,5.612424087524414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,4,128,1,float16,float16,0,7.1937103271484375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,2,128,1,float16,fp8,0,5.599601745605469
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,4,128,1,fp8,fp8,0,5.664067077636719
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,8,128,1,float16,fp8,0,5.724083328247071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,8,128,1,float16,float16,0,7.280436706542969
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,8,128,1,fp8,fp8,0,5.755084609985351
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,1,128,1,float16,fp8,0,2.866035270690918
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,1,128,1,float16,float16,0,3.417939376831055
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,16,128,1,float16,fp8,0,2.9136608123779295
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,1,128,1,fp8,fp8,0,2.884979248046875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,16,128,1,fp8,fp8,0,2.9401103973388674
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,2,128,1,float16,fp8,0,2.868275260925293
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,2,128,1,float16,float16,0,3.7312942504882813
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,2,128,1,fp8,fp8,0,2.8918832778930663
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,4,128,1,float16,fp8,0,2.8893728256225586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,4,128,1,float16,float16,0,3.5861583709716798
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,4,128,1,fp8,fp8,0,2.900707244873047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,8,128,1,float16,float16,0,3.584900665283203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,8,128,1,float16,fp8,0,2.9421920776367188
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,16,128,1,float16,fp8,0,1.5464303970336915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,8,128,1,fp8,fp8,0,3.246793746948242
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,16,128,1,fp8,fp8,0,1.6775104522705078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,1,128,1,float16,float16,0,1.690260887145996
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,1,128,1,fp8,fp8,0,1.5183119773864746
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,1,128,1,float16,fp8,0,1.7820320129394531
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,2,128,1,float16,float16,0,1.7212352752685547
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,2,128,1,float16,fp8,0,1.508193588256836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,2,128,1,fp8,fp8,0,1.5240816116333007
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,4,128,1,float16,float16,0,1.860428810119629
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,4,128,1,float16,fp8,0,1.4950719833374024
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,4,128,1,fp8,fp8,0,1.5553855895996094
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,8,128,1,float16,fp8,0,1.4871248245239257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,8,128,1,fp8,fp8,0,1.4925056457519532
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,8,128,1,float16,float16,0,1.9103872299194335
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,16,128,1,float16,fp8,0,0.8236175537109375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,16,128,1,fp8,fp8,0,0.8287103652954102
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,1,128,1,float16,float16,0,0.8878576278686523
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,1,128,1,float16,fp8,0,0.8027168273925781
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,1,128,1,fp8,fp8,0,0.9297552108764648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,2,128,1,float16,float16,0,0.8996912002563476
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,2,128,1,float16,fp8,0,0.8069392204284668
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,2,128,1,fp8,fp8,0,0.8072463989257812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,4,128,1,float16,float16,0,0.9043295860290528
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,4,128,1,fp8,fp8,0,0.8047264099121094
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,4,128,1,float16,fp8,0,0.8938960075378418
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,8,128,1,float16,float16,0,0.9106080055236816
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,8,128,1,float16,fp8,0,0.8278559684753418
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,8,128,1,fp8,fp8,0,0.8814703941345214
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,1,128,1,float16,fp8,0,3.3316993713378906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,1,128,1,fp8,fp8,0,3.3547359466552735
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,1,128,1,float16,float16,0,4.086795043945313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,2,128,1,float16,fp8,0,3.292158508300781
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,2,128,1,float16,float16,0,4.056313705444336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,2,128,1,fp8,fp8,0,3.39730224609375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,4,128,1,fp8,fp8,0,3.306967926025391
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,4,128,1,float16,float16,0,4.213651275634765
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,4,128,1,float16,fp8,0,3.5458560943603517
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,8,128,1,float16,float16,0,4.192587280273438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,16,128,1,float16,fp8,0,1.7714336395263672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,8,128,1,float16,fp8,0,3.3734832763671876
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,8,128,1,fp8,fp8,0,3.3453601837158202
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,16,128,1,fp8,fp8,0,1.9480384826660155
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,1,128,1,float16,float16,0,1.9382192611694335
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,1,128,1,float16,fp8,0,1.7450384140014648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,1,128,1,fp8,fp8,0,1.7186223983764648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,2,128,1,float16,float16,0,1.8836511611938476
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,2,128,1,float16,fp8,0,2.0339120864868163
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,2,128,1,fp8,fp8,0,1.6839183807373046
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,4,128,1,float16,float16,0,2.029199981689453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,4,128,1,float16,fp8,0,1.8487760543823242
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,4,128,1,fp8,fp8,0,1.7205215454101563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,8,128,1,float16,float16,0,2.0281808853149412
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,8,128,1,fp8,fp8,0,1.7166912078857421
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,16,128,1,float16,fp8,0,1.1320768356323243
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,16,128,1,fp8,fp8,0,0.9282879829406738
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,1,128,1,float16,float16,0,1.043227195739746
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,1,128,1,float16,fp8,0,0.8853023529052735
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,1,128,1,fp8,fp8,0,1.0061727523803712
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,2,128,1,float16,float16,0,0.9798959732055664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,2,128,1,float16,fp8,0,0.9396592140197754
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,2,128,1,fp8,fp8,0,0.9125503540039063
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,4,128,1,float16,float16,0,1.0181551933288575
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,4,128,1,float16,fp8,0,0.9113023757934571
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,4,128,1,fp8,fp8,0,0.9720671653747559
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,8,128,1,float16,float16,0,1.006771183013916
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,8,128,1,float16,fp8,0,0.9211071968078614
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,8,128,1,fp8,fp8,0,0.895844841003418
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,8,128,1,float16,fp8,0,1.8949647903442384
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,16,128,1,float16,fp8,0,0.534441614151001
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,16,128,1,fp8,fp8,0,0.5053040027618408
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,1,128,1,float16,float16,0,0.5414463996887207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,1,128,1,float16,fp8,0,0.49486241340637205
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,1,128,1,fp8,fp8,0,0.4932240009307861
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,2,128,1,float16,float16,0,0.5310800075531006
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,2,128,1,float16,fp8,0,0.4908559799194336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,2,128,1,fp8,fp8,0,0.4898223876953125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,4,128,1,float16,float16,0,0.5364528179168702
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,4,128,1,float16,fp8,0,0.4913648128509521
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,4,128,1,fp8,fp8,0,0.49141440391540525
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,8,128,1,float16,float16,0,0.5446991920471191
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,8,128,1,float16,fp8,0,0.49048638343811035
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,8,128,1,fp8,fp8,0,0.4929215908050537
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,1,128,1,float16,fp8,0,2.3235328674316404
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,1,128,1,float16,float16,0,2.732304000854492
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,1,128,1,fp8,fp8,0,2.343783950805664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,2,128,1,float16,float16,0,2.8663984298706056
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,2,128,1,float16,fp8,0,2.3314224243164063
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,2,128,1,fp8,fp8,0,2.5280879974365233
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,4,128,1,float16,float16,0,2.711369514465332
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,4,128,1,float16,fp8,0,2.351852798461914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,4,128,1,fp8,fp8,0,2.6248752593994142
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,8,128,1,float16,float16,0,2.859244728088379
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,8,128,1,float16,fp8,0,2.384639930725098
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,8,128,1,fp8,fp8,0,2.6763439178466797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,16,128,1,float16,fp8,0,1.305292797088623
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,16,128,1,fp8,fp8,0,1.2499024391174316
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,1,128,1,float16,float16,0,1.3483519554138184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,1,128,1,float16,fp8,0,1.2764032363891602
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,1,128,1,fp8,fp8,0,1.2505727767944337
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,2,128,1,float16,float16,0,1.353382396697998
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,2,128,1,float16,fp8,0,1.3809087753295899
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,2,128,1,fp8,fp8,0,1.267948818206787
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,4,128,1,float16,float16,0,1.3729984283447265
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,4,128,1,float16,fp8,0,1.3551648139953614
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,4,128,1,fp8,fp8,0,1.2414143562316895
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,8,128,1,float16,float16,0,1.3972543716430663
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,8,128,1,float16,fp8,0,1.3686623573303223
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,8,128,1,fp8,fp8,0,1.2529520034790038
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,16,128,1,float16,float16,0,0.7530576229095459
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,16,128,1,float16,float16,0,1.0442591667175294
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,16,128,1,float16,float16,0,0.567907190322876
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,16,128,1,float16,fp8,0,0.664851188659668
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,16,128,1,fp8,fp8,0,0.6656047821044921
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,1,128,1,float16,float16,0,0.7118368148803711
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,1,128,1,float16,fp8,0,0.6410848140716553
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,16,128,1,float16,float16,0,1.4323200225830077
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,2,128,1,float16,fp8,0,0.641596794128418
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,2,128,1,float16,float16,0,0.7178175926208497
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,16,128,1,float16,float16,0,3.607672119140625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,2,128,1,fp8,fp8,0,0.7553647994995117
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,4,128,1,float16,fp8,0,0.6601984024047851
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,16,128,1,float16,float16,0,1.7710063934326172
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,4,128,1,fp8,fp8,0,0.7554495811462403
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,8,128,1,float16,float16,0,0.7546832084655761
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,16,128,1,float16,float16,0,0.9337488174438476
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,16,128,1,float16,float16,0,0.4616191864013672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,8,128,1,fp8,fp8,0,0.6757775783538819
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,16,128,1,float16,fp8,0,0.40819358825683594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,16,128,1,fp8,fp8,0,0.3748399972915649
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,1,128,1,float16,float16,0,0.40409278869628906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,16,128,1,float16,float16,0,2.0151296615600587
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,1,128,1,float16,fp8,0,0.3833168029785156
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,1,128,1,fp8,fp8,0,0.3886944055557251
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,2,128,1,float16,float16,0,0.3869744062423706
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,2,128,1,float16,fp8,0,0.37360320091247556
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,4,128,1,float16,float16,0,0.403272008895874
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,2,128,1,fp8,fp8,0,0.42621917724609376
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,4,128,1,float16,fp8,0,0.37436161041259763
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,4,128,1,fp8,fp8,0,0.36080639362335204
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,8,128,1,float16,float16,0,0.4058095932006836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,8,128,1,float16,fp8,0,0.37418560981750487
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,8,128,1,fp8,fp8,0,0.42470240592956543
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,1,128,1,fp8,fp8,0,0.6427440166473388
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,4,128,1,float16,float16,0,0.7132480144500732
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,8,128,1,float16,fp8,0,0.6481071949005127
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,1,128,1,float16,fp8,0,3.013942337036133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,1,128,1,fp8,fp8,0,3.0284048080444337
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,2,128,1,fp8,fp8,0,3.019260787963867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,1,128,1,float16,float16,0,3.5629215240478516
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,2,128,1,float16,fp8,0,3.0669023513793947
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,2,128,1,float16,float16,0,3.639039993286133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,4,128,1,float16,float16,0,3.6710369110107424
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,4,128,1,float16,fp8,0,3.01983528137207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,16,128,1,float16,float16,0,1.9459760665893555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,16,128,1,float16,fp8,0,1.9133920669555664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,4,128,1,fp8,fp8,0,3.0257232666015623
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,8,128,1,float16,fp8,0,3.029422378540039
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,8,128,1,fp8,fp8,0,3.247761535644531
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,8,128,1,float16,float16,0,3.7889694213867187
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,1,128,1,float16,float16,0,1.789531135559082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,16,128,1,fp8,fp8,0,1.9765199661254882
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,1,128,1,float16,fp8,0,1.6925056457519532
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,1,128,1,fp8,fp8,0,1.5721808433532716
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,2,128,1,float16,float16,0,1.7674032211303712
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,2,128,1,float16,fp8,0,1.5807151794433594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,2,128,1,fp8,fp8,0,1.5790287971496582
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,4,128,1,fp8,fp8,0,1.581220817565918
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,4,128,1,float16,fp8,0,1.7374879837036132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,4,128,1,float16,float16,0,1.919972801208496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,8,128,1,float16,fp8,0,1.5479136466979981
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,8,128,1,float16,float16,0,1.8283599853515624
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,16,128,1,float16,fp8,0,0.8455663681030273
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,16,128,1,fp8,fp8,0,0.8527152061462402
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,8,128,1,fp8,fp8,0,1.7366687774658203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,1,128,1,float16,fp8,0,0.9578080177307129
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,1,128,1,fp8,fp8,0,0.9769696235656739
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,2,128,1,float16,float16,0,0.8831600189208985
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,2,128,1,float16,fp8,0,0.8064479827880859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,2,128,1,fp8,fp8,0,0.8320015907287598
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,4,128,1,float16,float16,0,0.9652688026428222
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,4,128,1,float16,fp8,0,0.8491567611694336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,4,128,1,fp8,fp8,0,0.8184752464294434
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,8,128,1,float16,float16,0,0.9257552146911621
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,8,128,1,float16,fp8,0,0.8211423873901367
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,16,128,1,float16,float16,0,0.5151055812835693
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,16,128,1,float16,fp8,0,0.4728447914123535
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,8,128,1,fp8,fp8,0,0.860478401184082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,16,128,1,fp8,fp8,0,0.4577375888824463
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,1,128,1,float16,float16,0,0.4742784023284912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,1,128,1,float16,fp8,0,0.45302882194519045
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,1,128,1,fp8,fp8,0,0.45357599258422854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,2,128,1,float16,float16,0,0.4771120071411133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,2,128,1,float16,fp8,0,0.4378176212310791
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,2,128,1,fp8,fp8,0,0.4407519817352295
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,16,128,1,float16,float16,0,0.9712944030761719
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,4,128,1,float16,float16,0,0.4808032035827637
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,4,128,1,float16,fp8,0,0.45441279411315916
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,4,128,1,fp8,fp8,0,0.44545760154724123
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,8,128,1,float16,float16,0,0.5049344062805176
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,8,128,1,float16,fp8,0,0.4413407802581787
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,16,128,1,float16,fp8,0,0.2687311887741089
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,8,128,1,fp8,fp8,0,0.43909921646118166
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,1,128,1,float16,float16,0,0.9060015678405762
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,16,128,1,fp8,fp8,0,0.26852641105651853
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,1,128,1,float16,float16,0,0.27647359371185304
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,1,128,1,float16,fp8,0,0.25470240116119386
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,1,128,1,fp8,fp8,0,0.2553152084350586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,2,128,1,float16,float16,0,0.2670016050338745
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,2,128,1,float16,fp8,0,0.2528815984725952
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,2,128,1,fp8,fp8,0,0.2582639932632446
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,4,128,1,float16,float16,0,0.27827839851379393
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,4,128,1,float16,fp8,0,0.2551232099533081
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,4,128,1,fp8,fp8,0,0.2554480075836182
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,8,128,1,float16,float16,0,0.27664799690246583
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,8,128,1,float16,fp8,0,0.2552095890045166
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,8,128,1,fp8,fp8,0,0.2565392017364502
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,1,128,1,float16,fp8,0,1.7918655395507812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,1,128,1,fp8,fp8,0,1.8004159927368164
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,2,128,1,float16,fp8,0,1.7967103958129882
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,1,128,1,float16,float16,0,2.043084716796875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,2,128,1,fp8,fp8,0,1.7961616516113281
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,2,128,1,float16,float16,0,2.094767951965332
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,4,128,1,float16,float16,0,2.045987129211426
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,16,128,1,float16,float16,0,0.2880768060684204
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,16,128,1,float16,float16,0,1.2039759635925293
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,4,128,1,float16,fp8,0,1.7978111267089845
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,4,128,1,fp8,fp8,0,1.8005104064941406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,8,128,1,fp8,fp8,0,1.7968223571777344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,16,128,1,fp8,fp8,0,0.981606388092041
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,16,128,1,float16,fp8,0,1.1329071998596192
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,8,128,1,float16,float16,0,2.201323127746582
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,8,128,1,float16,fp8,0,2.1628128051757813
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,1,128,1,float16,fp8,0,0.9231264114379882
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,1,128,1,float16,float16,0,1.1143919944763183
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,1,128,1,fp8,fp8,0,0.9290575981140137
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,2,128,1,float16,fp8,0,0.9262495994567871
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,2,128,1,float16,float16,0,1.0232416152954102
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,2,128,1,fp8,fp8,0,0.9791520118713379
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,4,128,1,float16,fp8,0,0.9464256286621093
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,4,128,1,float16,float16,0,1.1498784065246581
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,4,128,1,fp8,fp8,0,0.9238927841186524
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,16,128,1,float16,float16,0,0.5913136005401611
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,16,128,1,float16,fp8,0,0.5343279838562012
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,8,128,1,float16,fp8,0,0.9288399696350098
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,16,128,1,fp8,fp8,0,0.5237199783325195
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,8,128,1,float16,float16,0,1.1414640426635743
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,8,128,1,fp8,fp8,0,0.9274448394775391
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,1,128,1,float16,float16,0,0.5820528030395508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,1,128,1,float16,fp8,0,0.4893807888031006
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,1,128,1,fp8,fp8,0,0.5163280010223389
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,2,128,1,float16,float16,0,0.5301343917846679
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,2,128,1,fp8,fp8,0,0.4950592041015625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,4,128,1,float16,fp8,0,0.4908143997192383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,4,128,1,float16,float16,0,0.5938975811004639
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,4,128,1,fp8,fp8,0,0.5628255844116211
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,8,128,1,float16,float16,0,0.5522160053253173
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,16,128,1,float16,float16,0,0.3153232097625732
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,16,128,1,float16,fp8,0,0.28850879669189455
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,8,128,1,float16,fp8,0,0.5063712120056152
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,16,128,1,fp8,fp8,0,0.3292927980422974
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,1,128,1,float16,float16,0,0.311025595664978
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,1,128,1,float16,fp8,0,0.2747152090072632
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,1,128,1,fp8,fp8,0,0.27140159606933595
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,2,128,1,float16,fp8,0,0.2798991918563843
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,2,128,1,fp8,fp8,0,0.291430401802063
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,4,128,1,float16,float16,0,0.3057152032852173
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,4,128,1,float16,fp8,0,0.2770512104034424
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,4,128,1,fp8,fp8,0,0.2717152118682861
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,8,128,1,float16,float16,0,0.3012432098388672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,8,128,1,float16,fp8,0,0.28834240436553954
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,8,128,1,fp8,fp8,0,0.284932804107666
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,16,128,1,float16,float16,0,0.18358080387115477
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,16,128,1,float16,fp8,0,0.16914399862289428
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,1,128,1,float16,float16,0,0.1755184054374695
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,1,128,1,float16,fp8,0,0.1625264048576355
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,1,128,1,fp8,fp8,0,0.1614367961883545
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,2,128,1,float16,float16,0,0.17507519721984863
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,2,128,1,float16,fp8,0,0.16265759468078614
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,4,128,1,float16,float16,0,0.17681599855422975
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,4,128,1,float16,fp8,0,0.16280000209808348
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,4,128,1,fp8,fp8,0,0.1690991997718811
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,8,128,1,float16,float16,0,0.17396800518035888
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,8,128,1,float16,fp8,0,0.1649839997291565
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,8,128,1,fp8,fp8,0,0.16135200262069702
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,2,128,1,float16,fp8,0,0.49133758544921874
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,8,128,1,fp8,fp8,0,0.4929135799407959
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,1,128,1,float16,fp8,0,1.7252416610717773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,1,128,1,float16,float16,0,1.899825668334961
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,16,128,1,fp8,fp8,0,0.17097760438919068
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,1,128,1,fp8,fp8,0,1.7325040817260742
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,2,128,1,fp8,fp8,0,0.16152960062026978
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,2,128,1,float16,float16,0,1.8956207275390624
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,2,128,1,float16,fp8,0,1.725503921508789
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,2,128,1,fp8,fp8,0,1.8879968643188476
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,4,128,1,float16,fp8,0,1.732262420654297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,4,128,1,float16,float16,0,1.9320192337036133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,4,128,1,fp8,fp8,0,1.7270511627197265
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,16,128,1,float16,float16,0,1.1029760360717773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,2,128,1,float16,float16,0,0.2864784002304077
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,8,128,1,float16,fp8,0,1.727449607849121
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,16,128,1,float16,fp8,0,0.9584624290466308
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,8,128,1,float16,float16,0,2.1826047897338867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,1,128,1,float16,float16,0,0.946350383758545
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,16,128,1,fp8,fp8,0,1.1473008155822755
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,8,128,1,fp8,fp8,0,1.7820352554321288
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,1,128,1,float16,fp8,0,0.884671974182129
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,1,128,1,fp8,fp8,0,0.8854991912841796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,2,128,1,float16,float16,0,0.9543392181396484
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,2,128,1,float16,fp8,0,0.8859408378601075
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,4,128,1,float16,float16,0,0.973481559753418
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,4,128,1,fp8,fp8,0,0.8898127555847168
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,4,128,1,float16,fp8,0,0.9485343933105469
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,16,128,1,float16,float16,0,0.559611177444458
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,8,128,1,float16,float16,0,1.0169967651367187
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,16,128,1,float16,fp8,0,0.5865456104278565
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,1,128,1,float16,float16,0,0.48981761932373047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,8,128,1,fp8,fp8,0,0.9962176322937012
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,1,128,1,float16,fp8,0,0.46411681175231934
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,1,128,1,fp8,fp8,0,0.5204383850097656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,2,128,1,float16,float16,0,0.5007999897003174
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,2,128,1,fp8,fp8,0,0.4820591926574707
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,4,128,1,float16,float16,0,0.49390559196472167
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,4,128,1,float16,fp8,0,0.5002592086791993
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,4,128,1,fp8,fp8,0,0.49983839988708495
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,8,128,1,float16,float16,0,0.5254415988922119
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,8,128,1,float16,fp8,0,0.4652544021606445
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,16,128,1,float16,float16,0,0.30606560707092284
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,16,128,1,float16,fp8,0,0.2760976076126099
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,8,128,1,fp8,fp8,0,0.5014768123626709
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,16,128,1,fp8,fp8,0,0.272271990776062
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,1,128,1,float16,float16,0,0.2666304111480713
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,1,128,1,float16,fp8,0,0.2530463933944702
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,1,128,1,fp8,fp8,0,0.26066720485687256
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,2,128,1,float16,float16,0,0.26050400733947754
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,2,128,1,float16,fp8,0,0.26175360679626464
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,2,128,1,fp8,fp8,0,0.2529727935791016
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,4,128,1,float16,float16,0,0.2684288024902344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,4,128,1,float16,fp8,0,0.25647358894348143
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,4,128,1,fp8,fp8,0,0.2615983963012695
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,8,128,1,float16,fp8,0,0.257260799407959
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,8,128,1,float16,float16,0,0.27714560031890867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,8,128,1,fp8,fp8,0,0.2551151990890503
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,16,128,1,float16,float16,0,0.17244800329208373
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,16,128,1,float16,fp8,0,0.1593791961669922
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,2,128,1,fp8,fp8,0,1.0644960403442383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,1,128,1,float16,fp8,0,0.14738719463348388
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,1,128,1,float16,float16,0,0.15055520534515382
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,2,128,1,float16,float16,0,0.15032960176467897
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,2,128,1,float16,fp8,0,0.14666880369186402
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,2,128,1,fp8,fp8,0,0.14655359983444213
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,4,128,1,float16,float16,0,0.15319360494613649
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,8,128,1,float16,fp8,0,0.886740779876709
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,16,128,1,fp8,fp8,0,0.5015103816986084
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,4,128,1,float16,fp8,0,0.146452796459198
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,4,128,1,fp8,fp8,0,0.1463039994239807
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,8,128,1,float16,float16,0,0.1592960000038147
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,16,128,1,float16,fp8,0,0.0985152006149292
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,8,128,1,fp8,fp8,0,0.1457808017730713
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,16,128,1,fp8,fp8,0,0.09874240159988404
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,1,128,1,float16,float16,0,0.09831839799880981
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,1,128,1,float16,fp8,0,0.09381920099258423
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,2,128,1,float16,fp8,0,0.4696815967559814
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,1,128,1,fp8,fp8,0,0.09422399997711181
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,2,128,1,float16,float16,0,0.09845600128173829
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,2,128,1,float16,fp8,0,0.09380480051040649
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,2,128,1,fp8,fp8,0,0.09424480199813842
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,4,128,1,float16,float16,0,0.0988207995891571
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,4,128,1,float16,fp8,0,0.09441440105438233
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,4,128,1,fp8,fp8,0,0.09402239918708802
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,8,128,1,float16,float16,0,0.10127840042114258
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,8,128,1,float16,fp8,0,0.09456639885902404
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,1,128,1,float16,float16,0,1.1346575736999511
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,1,128,1,float16,fp8,0,1.059280014038086
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,16,128,1,fp8,fp8,0,0.15809119939804078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,1,128,1,fp8,fp8,0,0.14772640466690062
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,2,128,1,float16,float16,0,1.1374992370605468
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,1,128,1,fp8,fp8,0,1.2617535591125488
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,16,128,1,float16,float16,0,0.10549440383911132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,2,128,1,float16,fp8,0,1.0569727897644043
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,2,128,1,fp8,fp8,0,1.0592255592346191
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,4,128,1,float16,fp8,0,1.0985376358032226
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,8,128,1,fp8,fp8,0,0.09501760005950928
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,4,128,1,float16,float16,0,1.21353759765625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,4,128,1,fp8,fp8,0,1.058895969390869
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,16,128,1,float16,float16,0,0.6782688140869141
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,16,128,1,float16,fp8,0,0.7136703968048096
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,8,128,1,float16,float16,0,1.219921588897705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,8,128,1,float16,fp8,0,1.0604496002197266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,16,128,1,fp8,fp8,0,0.5990240097045898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,8,128,1,fp8,fp8,0,1.0771648406982421
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,1,128,1,float16,float16,0,0.560532808303833
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,1,128,1,float16,fp8,0,0.6052512168884278
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,2,128,1,float16,float16,0,0.5695184230804443
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,2,128,1,float16,fp8,0,0.5451568126678467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,2,128,1,fp8,fp8,0,0.5464352130889892
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,4,128,1,float16,float16,0,0.587062406539917
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,4,128,1,float16,fp8,0,0.566921615600586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,4,128,1,fp8,fp8,0,0.6063871860504151
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,8,128,1,float16,fp8,0,0.5471199989318848
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,8,128,1,float16,float16,0,0.6162320137023926
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,16,128,1,float16,float16,0,0.36366560459136965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,16,128,1,float16,fp8,0,0.3244191884994507
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,16,128,1,fp8,fp8,0,0.33724639415740965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,8,128,1,float16,fp8,0,0.14630719423294067
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,1,128,1,float16,fp8,0,0.2894223928451538
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,1,128,1,fp8,fp8,0,0.3204848051071167
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,2,128,1,float16,float16,0,0.2973151922225952
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,2,128,1,float16,fp8,0,0.3041280031204224
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,2,128,1,fp8,fp8,0,0.28941919803619387
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,4,128,1,float16,float16,0,0.30313920974731445
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,4,128,1,float16,fp8,0,0.3061232089996338
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,4,128,1,fp8,fp8,0,0.2920063972473145
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,8,128,1,float16,float16,0,0.3264048099517822
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,8,128,1,float16,fp8,0,0.29115679264068606
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,8,128,1,fp8,fp8,0,0.29393279552459717
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,16,128,1,float16,fp8,0,0.17700480222702025
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,16,128,1,fp8,fp8,0,0.18507519960403443
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,1,128,1,float16,float16,0,0.16393280029296875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,1,128,1,float16,fp8,0,0.16495840549468993
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,1,128,1,fp8,fp8,0,0.16118240356445312
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,2,128,1,float16,float16,0,0.17171839475631714
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,2,128,1,float16,fp8,0,0.1616752028465271
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,2,128,1,fp8,fp8,0,0.1616960048675537
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,4,128,1,float16,float16,0,0.16937439441680907
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,4,128,1,float16,fp8,0,0.16512479782104492
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,4,128,1,fp8,fp8,0,0.16135200262069702
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,8,128,1,float16,float16,0,0.17716000080108643
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,8,128,1,float16,fp8,0,0.1631055951118469
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,16,128,1,float16,float16,0,0.11212480068206787
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,16,128,1,float16,fp8,0,0.10460319519042968
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,8,128,1,fp8,fp8,0,0.16427359580993653
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,16,128,1,fp8,fp8,0,0.10361920595169068
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,1,128,1,float16,float16,0,0.09915680289268494
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,1,128,1,float16,fp8,0,0.09728800058364868
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,2,128,1,float16,float16,0,0.09852960109710693
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,2,128,1,float16,fp8,0,0.09706559777259827
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,2,128,1,fp8,fp8,0,0.09679200053215027
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,4,128,1,float16,float16,0,0.1002511978149414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,4,128,1,float16,fp8,0,0.0973151981830597
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,1,128,1,fp8,fp8,0,0.545137596130371
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,4,128,1,fp8,fp8,0,0.09657440185546876
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,8,128,1,float16,float16,0,0.10546400547027587
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,8,128,1,float16,fp8,0,0.09720000028610229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,16,128,1,float16,float16,0,0.07036799788475037
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,8,128,1,fp8,fp8,0,0.09753440022468567
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,16,128,1,float16,fp8,0,0.0658896028995514
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,16,128,1,fp8,fp8,0,0.06742879748344421
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,1,128,1,float16,float16,0,0.06620799899101257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,1,128,1,float16,fp8,0,0.06358399987220764
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,1,128,1,fp8,fp8,0,0.06372960209846497
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,2,128,1,float16,float16,0,0.06649439930915832
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,2,128,1,float16,fp8,0,0.06389120221138
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,4,128,1,float16,float16,0,0.0665120005607605
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,4,128,1,float16,fp8,0,0.06373119950294495
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,4,128,1,fp8,fp8,0,0.0637503981590271
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,8,128,1,float16,float16,0,0.06881600022315978
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,8,128,1,float16,fp8,0,0.06391199827194213
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,8,128,1,fp8,fp8,0,0.06373440027236939
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,8,128,1,fp8,fp8,0,0.546995210647583
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,1,128,1,float16,float16,0,0.2981152057647705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,1,128,1,float16,float16,0,1.0926752090454102
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,1,128,1,float16,fp8,0,1.074459171295166
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,1,128,1,fp8,fp8,0,1.0712528228759766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,16,128,1,float16,float16,0,0.19602080583572387
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,2,128,1,float16,float16,0,1.0991071701049804
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,2,128,1,float16,fp8,0,1.073633575439453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,2,128,1,fp8,fp8,0,1.183403205871582
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,4,128,1,float16,fp8,0,1.0715567588806152
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,1,128,1,fp8,fp8,0,0.09739999771118164
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,4,128,1,fp8,fp8,0,1.075100803375244
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,8,128,1,float16,float16,0,1.2328399658203124
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,8,128,1,float16,fp8,0,1.073844814300537
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,16,128,1,float16,float16,0,0.7163584232330322
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,16,128,1,float16,fp8,0,0.6196544170379639
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,2,128,1,fp8,fp8,0,0.0635695993900299
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,8,128,1,fp8,fp8,0,1.072606372833252
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,1,128,1,float16,float16,0,0.5612895965576172
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,1,128,1,fp8,fp8,0,0.5486671924591064
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,2,128,1,float16,float16,0,0.5580319881439209
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,2,128,1,float16,fp8,0,0.5473152160644531
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,4,128,1,float16,fp8,0,0.5476175785064697
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,4,128,1,fp8,fp8,0,0.5485680103302002
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,8,128,1,float16,float16,0,0.6170048236846923
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,8,128,1,float16,fp8,0,0.5492191791534424
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,4,128,1,float16,float16,0,1.133187198638916
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,8,128,1,fp8,fp8,0,0.548747205734253
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,16,128,1,float16,float16,0,0.3587935924530029
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,16,128,1,fp8,fp8,0,0.3226367950439453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,16,128,1,float16,fp8,0,0.32462239265441895
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,1,128,1,float16,fp8,0,0.29115519523620603
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,2,128,1,float16,float16,0,0.2865679979324341
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,16,128,1,fp8,fp8,0,0.7149807929992675
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,1,128,1,float16,fp8,0,0.56942720413208
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,2,128,1,float16,fp8,0,0.28932480812072753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,2,128,1,fp8,fp8,0,0.287608003616333
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,2,128,1,fp8,fp8,0,0.5493951797485351
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,4,128,1,float16,float16,0,0.2970351934432983
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,4,128,1,float16,fp8,0,0.28913280963897703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,4,128,1,fp8,fp8,0,0.28815360069274903
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,4,128,1,float16,float16,0,0.5742015838623047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,8,128,1,float16,float16,0,0.32030720710754396
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,8,128,1,float16,fp8,0,0.2885807991027832
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,8,128,1,fp8,fp8,0,0.29025280475616455
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,16,128,1,float16,fp8,0,0.17464319467544556
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,16,128,1,fp8,fp8,0,0.17614879608154296
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,1,128,1,float16,float16,0,0.15596480369567872
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,1,128,1,fp8,fp8,0,0.15619360208511351
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,2,128,1,float16,float16,0,0.15527199506759642
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,2,128,1,float16,fp8,0,0.1566655993461609
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,2,128,1,fp8,fp8,0,0.15661280155181884
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,4,128,1,float16,float16,0,0.16040960550308228
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,4,128,1,float16,fp8,0,0.15708320140838622
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,4,128,1,fp8,fp8,0,0.15684640407562256
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,8,128,1,float16,float16,0,0.17109919786453248
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,8,128,1,float16,fp8,0,0.15745760202407838
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,8,128,1,fp8,fp8,0,0.15704480409622193
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,16,128,1,float16,float16,0,0.10779039859771729
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,16,128,1,float16,fp8,0,0.10056799650192261
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,16,128,1,fp8,fp8,0,0.10093599557876587
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,1,128,1,float16,float16,0,0.09124799966812133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,1,128,1,float16,fp8,0,0.0901311993598938
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,1,128,1,fp8,fp8,0,0.09077919721603393
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,2,128,1,float16,float16,0,0.09126560091972351
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,2,128,1,float16,fp8,0,0.0907472014427185
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,2,128,1,fp8,fp8,0,0.09078720211982727
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,1,128,1,float16,float16,0,0.2838063955307007
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,4,128,1,float16,float16,0,0.09425920248031616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,4,128,1,float16,fp8,0,0.089547199010849
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,4,128,1,fp8,fp8,0,0.09085760116577149
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,8,128,1,float16,float16,0,0.09990400075912476
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,8,128,1,float16,fp8,0,0.09076640009880066
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,8,128,1,fp8,fp8,0,0.0908191978931427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,16,128,1,float16,fp8,0,0.06154879927635193
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,16,128,1,float16,float16,0,0.06569759845733643
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,1,128,1,fp8,fp8,0,0.28871200084686277
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,1,128,1,float16,float16,0,0.059308797121047974
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,1,128,1,fp8,fp8,0,0.057143998146057126
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,2,128,1,float16,float16,0,0.05890880227088928
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,2,128,1,float16,fp8,0,0.056775999069213864
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,4,128,1,float16,float16,0,0.05966079831123352
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,4,128,1,float16,fp8,0,0.05726879835128784
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,4,128,1,fp8,fp8,0,0.056848001480102536
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,8,128,1,float16,float16,0,0.06227840185165405
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,8,128,1,float16,fp8,0,0.05758559703826904
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,16,128,1,float16,float16,0,0.03991680145263672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,16,128,1,float16,fp8,0,0.039166399836540224
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,16,128,1,fp8,fp8,0,0.03909119963645935
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,1,128,1,float16,float16,0,0.037099200487136844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,1,128,1,float16,fp8,0,0.037083199620246886
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,1,128,1,fp8,fp8,0,0.0367823988199234
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,2,128,1,float16,float16,0,0.037571200728416444
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,2,128,1,float16,fp8,0,0.03709119856357575
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,2,128,1,fp8,fp8,0,0.03705439865589142
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,4,128,1,float16,float16,0,0.037678399682044984
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,4,128,1,float16,fp8,0,0.03704319894313812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,4,128,1,fp8,fp8,0,0.036980798840522765
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,8,128,1,float16,float16,0,0.03907040059566498
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,8,128,1,fp8,fp8,0,0.03707680106163025
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,16,128,1,float16,float16,0,0.19017759561538697
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,1,128,1,float16,fp8,0,0.15755679607391357
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,1,128,1,float16,float16,0,0.6672512054443359
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,1,128,1,float16,fp8,0,0.6868847846984864
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,1,128,1,fp8,fp8,0,0.6837567806243896
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,2,128,1,float16,float16,0,0.6764368057250977
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,2,128,1,float16,fp8,0,0.6868063926696777
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,2,128,1,fp8,fp8,0,0.6840176105499267
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,16,128,1,fp8,fp8,0,0.0616752028465271
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,1,128,1,float16,fp8,0,0.05761600136756897
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,2,128,1,fp8,fp8,0,0.05722079873085022
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,4,128,1,float16,float16,0,0.7078864097595214
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,4,128,1,float16,fp8,0,0.6855711936950684
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,8,128,1,fp8,fp8,0,0.05724800229072571
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,4,128,1,fp8,fp8,0,0.6843791961669922
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,16,128,1,float16,float16,0,0.45582242012023927
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,8,128,1,float16,fp8,0,0.687769603729248
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,8,128,1,float16,float16,0,0.7733903884887695
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,16,128,1,float16,fp8,0,0.408076810836792
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,8,128,1,fp8,fp8,0,0.6860832214355469
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,16,128,1,fp8,fp8,0,0.40671520233154296
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,1,128,1,float16,fp8,0,0.3536992073059082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,1,128,1,float16,float16,0,0.34200799465179443
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,1,128,1,fp8,fp8,0,0.35380799770355226
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,2,128,1,float16,float16,0,0.34178719520568845
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,2,128,1,float16,fp8,0,0.3533936023712158
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,8,128,1,float16,fp8,0,0.03711999952793121
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,2,128,1,fp8,fp8,0,0.3533423900604248
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,4,128,1,float16,float16,0,0.36683199405670164
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,4,128,1,fp8,fp8,0,0.3534015893936157
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,8,128,1,float16,fp8,0,0.35398719310760496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,8,128,1,fp8,fp8,0,0.35503520965576174
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,16,128,1,float16,fp8,0,0.21540958881378175
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,1,128,1,float16,float16,0,0.18036320209503173
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,1,128,1,float16,fp8,0,0.1880944013595581
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,2,128,1,float16,float16,0,0.1814736008644104
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,2,128,1,float16,fp8,0,0.18832000494003295
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,2,128,1,fp8,fp8,0,0.18797279596328736
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,4,128,1,float16,float16,0,0.18821280002593993
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,4,128,1,float16,fp8,0,0.1879696011543274
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,4,128,1,fp8,fp8,0,0.18935680389404297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,8,128,1,float16,float16,0,0.20342719554901123
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,8,128,1,float16,fp8,0,0.19006240367889404
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,8,128,1,fp8,fp8,0,0.18874720335006714
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,16,128,1,float16,float16,0,0.1281999945640564
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,16,128,1,float16,fp8,0,0.11932159662246704
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,16,128,1,fp8,fp8,0,0.11874239444732666
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,1,128,1,float16,float16,0,0.10327039957046509
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,1,128,1,float16,fp8,0,0.10376800298690796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,1,128,1,fp8,fp8,0,0.10340479612350464
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,2,128,1,float16,float16,0,0.10398399829864502
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,2,128,1,float16,fp8,0,0.1036687970161438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,2,128,1,fp8,fp8,0,0.10398880243301392
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,4,128,1,float16,float16,0,0.10741280317306519
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,4,128,1,float16,fp8,0,0.10373439788818359
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,4,128,1,fp8,fp8,0,0.10485919713973998
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,8,128,1,float16,float16,0,0.11477760076522828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,8,128,1,float16,fp8,0,0.1052016019821167
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,8,128,1,fp8,fp8,0,0.10554399490356445
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,16,128,1,float16,float16,0,0.07444959878921509
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,16,128,1,float16,fp8,0,0.06824480295181275
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,1,128,1,float16,float16,0,0.0623088002204895
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,1,128,1,float16,fp8,0,0.06186559796333313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,1,128,1,fp8,fp8,0,0.06196640133857727
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,2,128,1,float16,float16,0,0.061947202682495116
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,2,128,1,fp8,fp8,0,0.06182559728622437
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,2,128,1,float16,fp8,0,0.06195520162582398
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,4,128,1,float16,fp8,0,0.35394721031188964
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,16,128,1,float16,float16,0,0.2346479892730713
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,4,128,1,float16,float16,0,0.0638256013393402
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,8,128,1,float16,float16,0,0.3887232065200806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,4,128,1,float16,fp8,0,0.06204959750175476
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,4,128,1,fp8,fp8,0,0.06175360083580017
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,8,128,1,float16,float16,0,0.0680512011051178
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,8,128,1,fp8,fp8,0,0.062168002128601074
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,16,128,1,float16,float16,0,0.045238399505615236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,16,128,1,fp8,fp8,0,0.04323199987411499
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,1,128,1,float16,float16,0,0.041377601027488706
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,1,128,1,float16,fp8,0,0.0411215990781784
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,1,128,1,fp8,fp8,0,0.04023520052433014
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,2,128,1,float16,float16,0,0.04127359986305237
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,2,128,1,float16,fp8,0,0.041145598888397215
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,2,128,1,fp8,fp8,0,0.04054720103740692
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,1,128,1,fp8,fp8,0,0.18791840076446534
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,4,128,1,float16,fp8,0,0.04116159975528717
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,4,128,1,fp8,fp8,0,0.04105919897556305
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,8,128,1,float16,fp8,0,0.04115520119667053
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,8,128,1,fp8,fp8,0,0.041115200519561766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,16,128,1,float16,float16,0,0.03243359923362732
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,16,128,1,float16,fp8,0,0.03095200061798096
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,16,128,1,fp8,fp8,0,0.030950400233268737
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,1,128,1,float16,float16,0,0.03031040132045746
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,1,128,1,float16,fp8,0,0.02932159900665283
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,1,128,1,fp8,fp8,0,0.028811201453208923
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,2,128,1,float16,float16,0,0.03012320101261139
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,2,128,1,float16,fp8,0,0.028896000981330872
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,2,128,1,fp8,fp8,0,0.028939199447631837
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,4,128,1,float16,float16,0,0.030755200982093812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,4,128,1,float16,fp8,0,0.029017600417137145
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,4,128,1,fp8,fp8,0,0.028921601176261903
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,8,128,1,float16,float16,0,0.030980798602104186
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,8,128,1,float16,fp8,0,0.02882080078125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,8,128,1,fp8,fp8,0,0.028916800022125246
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,1,128,1,float16,float16,0,0.6944208145141602
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,1,128,1,float16,fp8,0,0.7440624237060547
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,16,128,1,fp8,fp8,0,0.06948639750480652
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,1,128,1,fp8,fp8,0,0.7395391941070557
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,2,128,1,float16,float16,0,0.6931968212127686
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,8,128,1,float16,fp8,0,0.06184800267219544
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,16,128,1,float16,fp8,0,0.043222400546073916
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,16,128,1,fp8,fp8,0,0.2147808074951172
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,2,128,1,float16,fp8,0,0.7444992065429688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,4,128,1,float16,float16,0,0.04264479875564575
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,2,128,1,fp8,fp8,0,0.7392608165740967
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,8,128,1,float16,float16,0,0.04395999908447266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,4,128,1,float16,float16,0,0.7376863956451416
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,4,128,1,float16,fp8,0,0.7408880233764649
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,4,128,1,fp8,fp8,0,0.743720006942749
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,8,128,1,float16,float16,0,0.8312944412231446
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,16,128,1,float16,float16,0,0.5091023921966553
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,8,128,1,float16,fp8,0,0.7453455924987793
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,16,128,1,float16,fp8,0,0.5115344047546386
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,1,128,1,float16,float16,0,0.35147359371185305
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,16,128,1,fp8,fp8,0,0.4499648094177246
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,1,128,1,float16,fp8,0,0.3799871921539307
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,1,128,1,fp8,fp8,0,0.3801984071731567
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,2,128,1,float16,float16,0,0.3530495882034302
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,2,128,1,float16,fp8,0,0.4317935943603516
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,2,128,1,fp8,fp8,0,0.3807152032852173
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,4,128,1,float16,float16,0,0.37643680572509763
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,4,128,1,float16,fp8,0,0.3798079967498779
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,4,128,1,fp8,fp8,0,0.3802704095840454
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,16,128,1,float16,float16,0,0.2579087972640991
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,8,128,1,float16,float16,0,0.41728639602661133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,8,128,1,float16,fp8,0,0.40108160972595214
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,8,128,1,fp8,fp8,0,0.3819472074508667
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,16,128,1,float16,fp8,0,0.2349776029586792
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,16,128,1,fp8,fp8,0,0.23444640636444092
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,1,128,1,float16,float16,0,0.18453119993209838
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,1,128,1,float16,fp8,0,0.2000783920288086
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,2,128,1,float16,float16,0,0.1860319972038269
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,2,128,1,float16,fp8,0,0.19913599491119385
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,2,128,1,fp8,fp8,0,0.1986464023590088
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,4,128,1,float16,float16,0,0.1948192000389099
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,4,128,1,float16,fp8,0,0.19982240200042725
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,4,128,1,fp8,fp8,0,0.19961919784545898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,8,128,1,float16,float16,0,0.21558079719543458
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,8,128,1,float16,fp8,0,0.19958560466766356
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,16,128,1,float16,float16,0,0.13668479919433593
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,8,128,1,fp8,fp8,0,0.19958399534225463
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,16,128,1,float16,fp8,0,0.12634880542755128
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,1,128,1,float16,float16,0,0.10284960269927979
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,16,128,1,fp8,fp8,0,0.1267632007598877
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,1,128,1,float16,fp8,0,0.1078495979309082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,1,128,1,fp8,fp8,0,0.10734080076217652
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,2,128,1,float16,float16,0,0.1032480001449585
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,2,128,1,float16,fp8,0,0.10755679607391358
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,2,128,1,fp8,fp8,0,0.10834720134735107
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,4,128,1,float16,float16,0,0.10849920511245728
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,4,128,1,float16,fp8,0,0.10820480585098266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,4,128,1,fp8,fp8,0,0.10832159519195557
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,8,128,1,float16,float16,0,0.11730719804763794
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,8,128,1,fp8,fp8,0,0.10968320369720459
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,16,128,1,float16,float16,0,0.0770799994468689
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,16,128,1,float16,fp8,0,0.07156959772109986
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,16,128,1,fp8,fp8,0,0.07227680087089539
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,1,128,1,float16,float16,0,0.05978879928588867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,1,128,1,float16,fp8,0,0.06165599822998047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,1,128,1,fp8,fp8,0,0.06153280138969421
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,2,128,1,float16,float16,0,0.059996801614761355
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,2,128,1,float16,fp8,0,0.06184639930725098
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,4,128,1,float16,float16,0,0.0631824016571045
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,4,128,1,float16,fp8,0,0.06144959926605224
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,4,128,1,fp8,fp8,0,0.06164960265159607
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,8,128,1,float16,fp8,0,0.06185439825057983
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,8,128,1,float16,float16,0,0.06941919922828674
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,8,128,1,fp8,fp8,0,0.061343997716903687
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,16,128,1,float16,fp8,0,0.04323840141296387
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,16,128,1,float16,float16,0,0.04530879855155945
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,16,128,1,fp8,fp8,0,0.04323520064353943
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,1,128,1,float16,float16,0,0.03844479918479919
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,1,128,1,float16,fp8,0,0.03914079964160919
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,1,128,1,fp8,fp8,0,0.039201599359512326
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,2,128,1,float16,fp8,0,0.039182400703430174
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,2,128,1,fp8,fp8,0,0.03912000060081482
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,4,128,1,float16,float16,0,0.03928000032901764
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,4,128,1,float16,fp8,0,0.03918719887733459
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,4,128,1,fp8,fp8,0,0.03914400041103363
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,8,128,1,float16,float16,0,0.04144960045814514
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,8,128,1,float16,fp8,0,0.03917120099067688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,8,128,1,fp8,fp8,0,0.7429215908050537
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,8,128,1,fp8,fp8,0,0.03917919993400574
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,16,128,1,float16,float16,0,0.026899200677871705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,16,128,1,float16,fp8,0,0.0268640011548996
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,16,128,1,fp8,fp8,0,0.026843199133872987
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,1,128,1,float16,fp8,0,0.02478879988193512
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,1,128,1,fp8,fp8,0,0.02479359954595566
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,1,128,1,float16,float16,0,0.024684800207614897
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,2,128,1,float16,float16,0,0.024799999594688416
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,2,128,1,float16,fp8,0,0.024774399399757386
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,2,128,1,fp8,fp8,0,0.024827200174331664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,4,128,1,float16,float16,0,0.02476159930229187
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,4,128,1,fp8,fp8,0,0.02478879988193512
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,8,128,1,float16,float16,0,0.026900801062583923
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,4,128,1,float16,fp8,0,0.024823999404907225
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,8,128,1,float16,fp8,0,0.02478879988193512
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,8,128,1,fp8,fp8,0,0.024801599979400634
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,16,128,1,float16,float16,0,0.02481600046157837
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,16,128,1,float16,fp8,0,0.024798400700092316
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,16,128,1,fp8,fp8,0,0.024684800207614897
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,1,128,1,float16,fp8,0,0.022756800055503845
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,1,128,1,fp8,fp8,0,0.022694399952888487
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,2,128,1,float16,float16,0,0.022742399573326112
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,2,128,1,float16,fp8,0,0.02274399995803833
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,2,128,1,fp8,fp8,0,0.02279680073261261
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,4,128,1,float16,float16,0,0.022756800055503845
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,4,128,1,float16,fp8,0,0.022784000635147093
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,4,128,1,fp8,fp8,0,0.02276960015296936
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,8,128,1,float16,float16,0,0.02346719950437546
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,8,128,1,float16,fp8,0,0.022702400386333466
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,8,128,1,fp8,fp8,0,0.022708800435066224
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,1,128,1,fp8,fp8,0,0.20502560138702391
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,16,1,128,1,float16,float16,0,0.49902877807617185
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,16,1,128,1,float16,fp8,0,0.5725088119506836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,16,1,128,1,fp8,fp8,0,0.5753007888793945
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,16,2,128,1,float16,float16,0,0.5055200099945069
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,8,128,1,float16,fp8,0,0.10945440530776977
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,16,2,128,1,float16,fp8,0,0.572001600265503
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,2,128,1,fp8,fp8,0,0.06154559850692749
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,16,2,128,1,fp8,fp8,0,0.5750463962554931
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,16,4,128,1,float16,float16,0,0.5484367847442627
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,16,4,128,1,float16,fp8,0,0.5753119945526123
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,16,4,128,1,fp8,fp8,0,0.5723487854003906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,16,8,128,1,float16,float16,0,0.6398560047149658
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,16,8,128,1,float16,fp8,0,0.5755311965942382
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,16,8,128,1,fp8,fp8,0,0.573967981338501
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,16,16,128,1,float16,fp8,0,0.36207358837127684
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,16,1,128,1,float16,float16,0,0.2572751998901367
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,16,16,128,1,fp8,fp8,0,0.3641103982925415
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,16,1,128,1,float16,fp8,0,0.29415040016174315
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,16,1,128,1,fp8,fp8,0,0.29360640048980713
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,16,2,128,1,float16,float16,0,0.2591295957565308
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,16,2,128,1,float16,fp8,0,0.29495360851287844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,16,2,128,1,fp8,fp8,0,0.2939824104309082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,16,4,128,1,float16,float16,0,0.2784960031509399
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,16,4,128,1,float16,fp8,0,0.29556639194488527
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,16,4,128,1,fp8,fp8,0,0.29495038986206057
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,16,8,128,1,float16,fp8,0,0.2947439908981323
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,16,8,128,1,fp8,fp8,0,0.2957823991775513
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,16,16,128,1,float16,float16,0,0.20874719619750975
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,16,16,128,1,float16,fp8,0,0.18894560337066652
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,16,1,128,1,float16,float16,0,0.13822879791259765
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,16,16,128,1,fp8,fp8,0,0.18942240476608277
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,16,1,128,1,float16,fp8,0,0.15379680395126344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,16,1,128,1,fp8,fp8,0,0.15442399978637694
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,16,2,128,1,float16,float16,0,0.1389664053916931
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,16,2,128,1,float16,fp8,0,0.15423840284347534
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,16,2,128,1,fp8,fp8,0,0.15451359748840332
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,16,4,128,1,float16,float16,0,0.147489595413208
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,16,4,128,1,float16,fp8,0,0.15380640029907228
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,16,4,128,1,fp8,fp8,0,0.15429439544677734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,16,8,128,1,float16,float16,0,0.16756160259246827
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,16,8,128,1,fp8,fp8,0,0.15462080240249634
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,16,16,128,1,float16,float16,0,0.11170879602432252
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,16,16,128,1,float16,fp8,0,0.10209439992904663
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,16,16,128,1,fp8,fp8,0,0.10262720584869385
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,2,128,1,float16,float16,0,0.03931359946727753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,16,1,128,1,float16,float16,0,0.07744640111923218
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,16,1,128,1,float16,fp8,0,0.08312640190124512
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,16,1,128,1,fp8,fp8,0,0.08408480286598205
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,16,2,128,1,float16,float16,0,0.07777919769287109
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,16,2,128,1,float16,fp8,0,0.0837984025478363
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,16,2,128,1,fp8,fp8,0,0.08443040251731873
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,16,4,128,1,float16,fp8,0,0.08453119993209839
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,16,16,128,1,float16,float16,0,0.4053040027618408
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,16,4,128,1,fp8,fp8,0,0.08476960062980651
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,16,8,128,1,float16,float16,0,0.09239360094070434
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,16,8,128,1,float16,fp8,0,0.08523039817810059
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,16,16,128,1,float16,fp8,0,0.057599997520446776
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,16,8,128,1,fp8,fp8,0,0.08525440096855164
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,16,16,128,1,fp8,fp8,0,0.05744640231132507
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,1,128,1,float16,float16,0,0.02271520048379898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,16,1,128,1,float16,float16,0,0.04458400011062622
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,16,1,128,1,float16,fp8,0,0.04732959866523743
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,16,2,128,1,float16,float16,0,0.04454559981822968
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,16,2,128,1,float16,fp8,0,0.04733439981937408
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,16,2,128,1,fp8,fp8,0,0.047331199049949646
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,16,4,128,1,float16,float16,0,0.04731520116329193
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,16,8,128,1,float16,float16,0,0.0534991979598999
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,16,4,128,1,fp8,fp8,0,0.04728800058364868
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,16,8,128,1,float16,fp8,0,0.04736160039901734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,16,8,128,1,fp8,fp8,0,0.04764319956302643
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,16,16,128,1,float16,float16,0,0.03530240058898926
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,16,16,128,1,float16,fp8,0,0.034544000029563905
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,16,8,128,1,float16,float16,0,0.320632004737854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,16,16,128,1,fp8,fp8,0,0.03487679958343506
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,16,1,128,1,float16,float16,0,0.028865599632263185
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,16,1,128,1,float16,fp8,0,0.02903519868850708
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,16,2,128,1,float16,float16,0,0.028910401463508605
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,16,1,128,1,fp8,fp8,0,0.029600000381469725
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,16,2,128,1,float16,fp8,0,0.029014399647712706
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,16,2,128,1,fp8,fp8,0,0.028958401083946227
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,16,4,128,1,float16,float16,0,0.03009440004825592
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,16,4,128,1,float16,fp8,0,0.029142400622367857
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,16,8,128,1,float16,float16,0,0.03296799957752228
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,16,8,128,1,float16,fp8,0,0.029209598898887634
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,16,8,128,1,fp8,fp8,0,0.029993599653244017
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,16,16,128,1,float16,float16,0,0.020735999941825865
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,16,16,128,1,float16,fp8,0,0.02072640061378479
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,16,16,128,1,fp8,fp8,0,0.020695999264717102
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,16,1,128,1,float16,float16,0,0.018614399433135986
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,16,1,128,1,float16,fp8,0,0.01866080015897751
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,16,1,128,1,fp8,fp8,0,0.01860159933567047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,16,2,128,1,float16,float16,0,0.018649600446224213
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,16,2,128,1,float16,fp8,0,0.018654400110244752
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,16,8,128,1,float16,fp8,0,0.15406559705734252
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,16,4,128,1,float16,float16,0,0.01866399943828583
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,16,4,128,1,float16,fp8,0,0.018572799861431122
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,16,4,128,1,fp8,fp8,0,0.018646399676799773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,16,8,128,1,float16,float16,0,0.0206496000289917
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,16,8,128,1,float16,fp8,0,0.01863040030002594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,16,8,128,1,fp8,fp8,0,0.018622399866580965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,16,16,128,1,float16,float16,0,0.018564799427986146
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,16,16,128,1,float16,fp8,0,0.01851679980754852
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,16,4,128,1,float16,float16,0,0.08259680271148681
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,16,16,128,1,fp8,fp8,0,0.018544000387191773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,16,1,128,1,float16,float16,0,0.016627199947834015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,16,1,128,1,float16,fp8,0,0.016606399416923524
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,16,1,128,1,fp8,fp8,0,0.01661120057106018
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,16,2,128,1,float16,float16,0,0.016569599509239197
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,16,2,128,1,float16,fp8,0,0.01661919951438904
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,16,2,128,1,fp8,fp8,0,0.016633599996566772
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,16,16,128,1,float16,float16,0,0.061703997850418094
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,16,4,128,1,float16,float16,0,0.01675039976835251
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,16,4,128,1,float16,fp8,0,0.016616000235080718
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,16,8,128,1,float16,float16,0,0.01806560009717941
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,16,8,128,1,float16,fp8,0,0.016572800278663636
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,16,8,128,1,fp8,fp8,0,0.01658560037612915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,16,16,128,1,float16,float16,0,0.016569599509239197
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,16,16,128,1,float16,fp8,0,0.016548800468444824
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,16,4,128,1,float16,fp8,0,0.047225600481033324
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,16,1,128,1,float16,float16,0,0.01661760061979294
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,16,1,128,1,float16,fp8,0,0.01664000004529953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,16,1,128,1,fp8,fp8,0,0.016607999801635742
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,16,2,128,1,float16,float16,0,0.016603200137615202
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,16,2,128,1,float16,fp8,0,0.016582399606704712
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,16,4,128,1,float16,float16,0,0.016607999801635742
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,16,4,128,1,float16,fp8,0,0.01655520051717758
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,16,4,128,1,fp8,fp8,0,0.016624000668525696
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,16,8,128,1,float16,float16,0,0.01663520038127899
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,16,8,128,1,float16,fp8,0,0.01663520038127899
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,16,8,128,1,fp8,fp8,0,0.016572800278663636
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,16,4,128,1,fp8,fp8,0,0.028958401083946227
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,16,1,128,1,float16,float16,0,0.2073456048965454
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,16,1,128,1,float16,fp8,0,0.24820959568023682
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,16,1,128,1,fp8,fp8,0,0.2487056016921997
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,16,2,128,1,float16,float16,0,0.20918240547180175
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,16,2,128,1,fp8,fp8,0,0.018638400733470915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,16,2,128,1,float16,fp8,0,0.24846398830413818
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,16,2,128,1,fp8,fp8,0,0.24903039932250975
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,16,4,128,1,float16,float16,0,0.22790238857269288
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,16,4,128,1,float16,fp8,0,0.24805281162261963
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,16,4,128,1,fp8,fp8,0,0.248636794090271
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,16,8,128,1,float16,float16,0,0.26790239810943606
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,16,8,128,1,float16,fp8,0,0.24831840991973878
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,16,16,128,1,float16,float16,0,0.1813055992126465
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,16,8,128,1,fp8,fp8,0,0.24888958930969238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,16,16,128,1,float16,fp8,0,0.1624511957168579
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,16,1,128,1,float16,float16,0,0.111353600025177
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,16,1,128,1,float16,fp8,0,0.12959680557250977
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,16,4,128,1,fp8,fp8,0,0.016624000668525696
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,16,1,128,1,fp8,fp8,0,0.04721280038356781
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,16,1,128,1,fp8,fp8,0,0.1291759967803955
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,16,2,128,1,float16,float16,0,0.11235040426254272
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,16,16,128,1,fp8,fp8,0,0.016551999747753142
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,16,2,128,1,float16,fp8,0,0.12942399978637695
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,16,2,128,1,fp8,fp8,0,0.1288432002067566
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,16,4,128,1,float16,float16,0,0.12115999460220336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,16,4,128,1,float16,fp8,0,0.12983360290527343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,16,8,128,1,float16,float16,0,0.14035040140151978
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,16,8,128,1,float16,fp8,0,0.12953120470046997
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,16,2,128,1,fp8,fp8,0,0.016622400283813475
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,16,8,128,1,fp8,fp8,0,0.12978240251541137
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,16,16,128,1,float16,float16,0,0.09729599952697754
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,16,1,128,1,float16,float16,0,0.06201120018959046
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,16,16,128,1,fp8,fp8,0,0.08621919751167298
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,16,1,128,1,fp8,fp8,0,0.06859999895095825
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,16,1,128,1,float16,fp8,0,0.06811839938163758
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,16,2,128,1,fp8,fp8,0,0.06815999746322632
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,16,4,128,1,float16,float16,0,0.06658719778060913
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,16,4,128,1,float16,fp8,0,0.06936479806900024
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,16,4,128,1,fp8,fp8,0,0.06977279782295227
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,16,8,128,1,float16,float16,0,0.07663360238075256
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,16,8,128,1,float16,fp8,0,0.06981599926948548
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,16,8,128,1,fp8,fp8,0,0.06989759802818299
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,16,16,128,1,float16,float16,0,0.05507680177688599
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,16,16,128,1,float16,fp8,0,0.050460797548294065
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,16,16,128,1,fp8,fp8,0,0.05072000026702881
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,16,1,128,1,float16,float16,0,0.03671840131282807
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,16,1,128,1,float16,fp8,0,0.04036639928817749
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,16,1,128,1,fp8,fp8,0,0.0394896000623703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,16,2,128,1,float16,float16,0,0.03686400055885315
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,16,2,128,1,fp8,fp8,0,0.039150398969650266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,16,4,128,1,float16,float16,0,0.03976800143718719
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,16,4,128,1,float16,fp8,0,0.03912320137023926
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,16,4,128,1,fp8,fp8,0,0.039345601201057435
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,16,8,128,1,float16,float16,0,0.045281600952148435
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,16,8,128,1,float16,fp8,0,0.039643201231956485
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,16,8,128,1,fp8,fp8,0,0.03945119976997376
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,16,16,128,1,float16,float16,0,0.02900159955024719
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,16,16,128,1,float16,fp8,0,0.027692800760269164
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,16,16,128,1,fp8,fp8,0,0.029068800806999206
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,16,1,128,1,float16,float16,0,0.022678400576114654
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,16,1,128,1,float16,fp8,0,0.022856000065803527
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,16,1,128,1,fp8,fp8,0,0.022753599286079406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,16,2,128,1,float16,float16,0,0.022752000391483305
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,16,2,128,1,float16,fp8,0,0.024799999594688416
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,16,2,128,1,fp8,fp8,0,0.02475520074367523
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,16,4,128,1,float16,float16,0,0.022771200537681578
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,16,4,128,1,float16,fp8,0,0.02481279969215393
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,16,16,128,1,fp8,fp8,0,0.16412160396575928
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,16,4,128,1,fp8,fp8,0,0.024835200607776643
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,16,8,128,1,float16,float16,0,0.02571359872817993
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,16,8,128,1,float16,fp8,0,0.02481440007686615
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,16,8,128,1,fp8,fp8,0,0.02473919987678528
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,16,16,128,1,float16,float16,0,0.01712000072002411
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,16,16,128,1,float16,fp8,0,0.018641600012779237
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,16,16,128,1,fp8,fp8,0,0.018571199476718904
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,16,1,128,1,float16,float16,0,0.014561599493026734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,16,1,128,1,float16,fp8,0,0.016200000047683717
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,16,1,128,1,fp8,fp8,0,0.016616000235080718
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,16,2,128,1,float16,float16,0,0.014582400023937226
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,16,4,128,1,fp8,fp8,0,0.12898399829864501
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,16,2,128,1,float16,fp8,0,0.01658560037612915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,16,2,128,1,fp8,fp8,0,0.016599999368190767
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,16,4,128,1,float16,fp8,0,0.016604800522327424
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,16,16,128,1,float16,fp8,0,0.08662559986114501
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,16,4,128,1,fp8,fp8,0,0.016612799465656282
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,16,8,128,1,float16,float16,0,0.016603200137615202
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,16,2,128,1,float16,float16,0,0.062015998363494876
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,16,8,128,1,float16,fp8,0,0.01658719927072525
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,16,2,128,1,float16,fp8,0,0.06874880194664001
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,16,16,128,1,float16,fp8,0,0.014561599493026734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,16,16,128,1,fp8,fp8,0,0.014553600549697876
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,16,1,128,1,float16,float16,0,0.013948799669742584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,16,1,128,1,float16,fp8,0,0.014585599303245544
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,16,1,128,1,fp8,fp8,0,0.014577600359916686
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,16,2,128,1,float16,float16,0,0.013649600744247436
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,16,2,128,1,float16,fp8,0,0.014591999351978302
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,16,2,128,1,fp8,fp8,0,0.014550399780273438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,16,4,128,1,float16,float16,0,0.01446239948272705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,16,4,128,1,float16,fp8,0,0.01451359987258911
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,16,4,128,1,fp8,fp8,0,0.014511999487876893
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,16,8,128,1,float16,float16,0,0.014553600549697876
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,16,8,128,1,float16,fp8,0,0.014590400457382201
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,16,8,128,1,fp8,fp8,0,0.01459999978542328
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,16,16,128,1,float16,float16,0,0.01451520025730133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,16,1,128,1,float16,float16,0,0.012827199697494508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,16,2,128,1,float16,fp8,0,0.04024479985237121
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,16,16,128,1,float16,fp8,0,0.014567999541759491
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,16,16,128,1,fp8,fp8,0,0.014523200690746307
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,16,1,128,1,fp8,fp8,0,0.01313759982585907
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,16,2,128,1,float16,float16,0,0.012492799758911132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,16,2,128,1,float16,fp8,0,0.01250080019235611
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,16,2,128,1,fp8,fp8,0,0.013708800077438354
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,16,4,128,1,float16,float16,0,0.013166399300098419
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,16,4,128,1,float16,fp8,0,0.012540799379348756
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,16,4,128,1,fp8,fp8,0,0.012520000338554382
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,16,8,128,1,float16,float16,0,0.01454399973154068
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,16,8,128,1,float16,fp8,0,0.014291200041770934
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,16,8,128,1,fp8,fp8,0,0.012505599856376648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,16,16,128,1,float16,float16,0,0.01353600025177002
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,16,16,128,1,float16,fp8,0,0.013556799292564392
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,16,16,128,1,fp8,fp8,0,0.012521600723266602
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,16,1,128,1,float16,float16,0,0.012508800625801087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,16,1,128,1,float16,fp8,0,0.012537600100040435
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,16,1,128,1,fp8,fp8,0,0.012495999783277511
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,16,2,128,1,float16,float16,0,0.012484800070524216
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,16,2,128,1,float16,fp8,0,0.012484800070524216
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,16,2,128,1,fp8,fp8,0,0.012750400602817536
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,16,4,128,1,float16,float16,0,0.012508800625801087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,16,4,128,1,float16,fp8,0,0.01250080019235611
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,16,4,128,1,fp8,fp8,0,0.012580800056457519
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,16,8,128,1,float16,float16,0,0.01266240030527115
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,16,8,128,1,float16,fp8,0,0.012505599856376648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,16,8,128,1,fp8,fp8,0,0.01252480000257492
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,16,1,128,1,float16,float16,0,0.13396159410476685
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,16,1,128,1,float16,fp8,0,0.15196000337600707
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,16,1,128,1,fp8,fp8,0,0.15259040594100953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,16,2,128,1,float16,float16,0,0.1351696014404297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,16,2,128,1,float16,fp8,0,0.15220799446105956
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,16,2,128,1,fp8,fp8,0,0.1525439977645874
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,16,4,128,1,float16,float16,0,0.14358559846878052
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,16,4,128,1,float16,fp8,0,0.15260000228881837
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,16,4,128,1,fp8,fp8,0,0.15285600423812867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,16,8,128,1,float16,float16,0,0.1620751976966858
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,16,8,128,1,float16,fp8,0,0.15250879526138306
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,16,8,128,1,fp8,fp8,0,0.1526095986366272
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,16,16,128,1,float16,fp8,0,0.09834240078926086
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,16,1,128,1,float16,float16,0,0.07315679788589477
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,16,1,128,1,float16,fp8,0,0.08048959970474243
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,16,1,128,1,fp8,fp8,0,0.08007519841194152
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,16,4,128,1,float16,float16,0,0.014563199877738953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,16,2,128,1,float16,fp8,0,0.08048480153083801
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,16,16,128,1,float16,float16,0,0.014591999351978302
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,16,2,128,1,fp8,fp8,0,0.08004800081253052
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,16,8,128,1,fp8,fp8,0,0.016648000478744505
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,16,4,128,1,float16,float16,0,0.07749599814414979
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,16,4,128,1,float16,fp8,0,0.08028479814529418
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,16,4,128,1,fp8,fp8,0,0.08017119765281677
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,16,8,128,1,float16,float16,0,0.08568000197410583
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,16,8,128,1,float16,fp8,0,0.08142880201339722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,16,16,128,1,float16,float16,0,0.055251199007034305
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,16,8,128,1,fp8,fp8,0,0.08140959739685058
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,16,16,128,1,float16,fp8,0,0.05326399803161621
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,16,16,128,1,fp8,fp8,0,0.05338720083236694
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,16,1,128,1,float16,fp8,0,0.04327200055122375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,16,1,128,1,fp8,fp8,0,0.043140798807144165
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,16,2,128,1,float16,float16,0,0.0401775985956192
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,16,1,128,1,float16,fp8,0,0.012492799758911132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,16,2,128,1,float16,fp8,0,0.04310719966888428
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,16,2,128,1,fp8,fp8,0,0.04323039948940277
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,16,4,128,1,float16,float16,0,0.04324800074100495
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,16,4,128,1,float16,fp8,0,0.04314880073070526
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,16,4,128,1,fp8,fp8,0,0.04321439862251282
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,16,8,128,1,float16,float16,0,0.04734880030155182
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,16,8,128,1,float16,fp8,0,0.04312320053577423
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,16,8,128,1,fp8,fp8,0,0.043244799971580504
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,16,16,128,1,float16,float16,0,0.0322735995054245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,16,16,128,1,float16,fp8,0,0.03094559907913208
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,16,1,128,1,float16,float16,0,0.024766400456428528
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,16,16,128,1,fp8,fp8,0,0.030956798791885377
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,16,1,128,1,float16,fp8,0,0.026822400093078614
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,16,1,128,1,fp8,fp8,0,0.02709760069847107
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,16,2,128,1,float16,float16,0,0.024784000217914583
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,16,2,128,1,float16,fp8,0,0.02675360143184662
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,16,2,128,1,fp8,fp8,0,0.026851201057434083
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,16,4,128,1,float16,float16,0,0.025212800502777098
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,16,4,128,1,float16,fp8,0,0.026825600862503053
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,16,4,128,1,fp8,fp8,0,0.026819199323654175
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,16,8,128,1,float16,float16,0,0.02884320020675659
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,16,8,128,1,float16,fp8,0,0.0267551988363266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,16,8,128,1,fp8,fp8,0,0.026763200759887695
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,16,16,128,1,float16,float16,0,0.018648000061511995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,16,16,128,1,float16,fp8,0,0.01855680048465729
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,16,1,128,1,float16,float16,0,0.016606399416923524
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,16,1,128,1,float16,fp8,0,0.016519999504089354
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,16,1,128,1,fp8,fp8,0,0.01661760061979294
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,16,2,128,1,float16,float16,0,0.016513599455356597
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,16,2,128,1,float16,fp8,0,0.016627199947834015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,16,2,128,1,fp8,fp8,0,0.016622400283813475
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,16,4,128,1,float16,float16,0,0.016667200624942778
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,16,4,128,1,float16,fp8,0,0.016607999801635742
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,16,4,128,1,fp8,fp8,0,0.016652800142765045
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,16,8,128,1,float16,float16,0,0.01754560023546219
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,16,8,128,1,fp8,fp8,0,0.016599999368190767
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,16,16,128,1,float16,float16,0,0.012510399520397186
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,16,16,128,1,float16,float16,0,0.10439039468765259
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,16,16,128,1,float16,fp8,0,0.012508800625801087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,16,1,128,1,float16,float16,0,0.012438400089740754
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,16,16,128,1,fp8,fp8,0,0.01250080019235611
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,16,16,128,1,fp8,fp8,0,0.09868800044059753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,16,1,128,1,fp8,fp8,0,0.012459199875593185
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,16,2,128,1,float16,fp8,0,0.012439999729394913
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,16,2,128,1,float16,float16,0,0.07361440062522888
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,16,2,128,1,fp8,fp8,0,0.012436799705028534
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,16,4,128,1,float16,fp8,0,0.012427199631929398
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,16,4,128,1,fp8,fp8,0,0.012532800436019897
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,16,8,128,1,float16,float16,0,0.0125231996178627
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,16,8,128,1,float16,fp8,0,0.01249919980764389
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,16,8,128,1,fp8,fp8,0,0.012534399330615998
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,16,16,128,1,float16,float16,0,0.012520000338554382
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,16,16,128,1,float16,fp8,0,0.012515200674533844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,16,16,128,1,fp8,fp8,0,0.012520000338554382
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,16,1,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,16,1,128,1,float16,fp8,0,0.010596799850463866
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,16,1,128,1,fp8,fp8,0,0.010763200372457505
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,16,2,128,1,float16,float16,0,0.010572800040245056
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,16,2,128,1,float16,fp8,0,0.010927999764680863
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,16,2,128,1,fp8,fp8,0,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,16,4,128,1,float16,float16,0,0.0104592002928257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,16,4,128,1,float16,fp8,0,0.010476800054311753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,16,4,128,1,fp8,fp8,0,0.010471999645233154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,16,8,128,1,float16,float16,0,0.010494399815797806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,16,8,128,1,float16,fp8,0,0.01045759990811348
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,16,8,128,1,fp8,fp8,0,0.010527999699115753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,16,16,128,1,float16,float16,0,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,16,16,128,1,float16,fp8,0,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,16,16,128,1,fp8,fp8,0,0.010475199669599533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,16,1,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,16,1,128,1,float16,fp8,0,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,16,16,128,1,fp8,fp8,0,0.0186256006360054
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,16,1,128,1,fp8,fp8,0,0.010491199791431427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,16,2,128,1,float16,float16,0,0.01048479974269867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,16,2,128,1,fp8,fp8,0,0.01048320010304451
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,16,2,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,16,4,128,1,float16,fp8,0,0.010520000010728836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,16,4,128,1,fp8,fp8,0,0.010393600165843963
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,16,8,128,1,float16,float16,0,0.0104592002928257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,16,8,128,1,float16,fp8,0,0.010491199791431427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,16,8,128,1,fp8,fp8,0,0.010566399991512298
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,16,16,128,1,float16,float16,0,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,16,16,128,1,fp8,fp8,0,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,16,16,128,1,float16,fp8,0,0.010492800176143647
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,16,1,128,1,float16,float16,0,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,16,8,128,1,float16,fp8,0,0.01658879965543747
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,16,1,128,1,float16,fp8,0,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,16,1,128,1,fp8,fp8,0,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,16,2,128,1,float16,float16,0,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,16,2,128,1,fp8,fp8,0,0.010387200117111205
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,16,4,128,1,float16,float16,0,0.010492800176143647
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,16,4,128,1,float16,fp8,0,0.010427200049161912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,16,4,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,16,1,128,1,float16,fp8,0,0.012404800206422806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,16,8,128,1,float16,float16,0,0.010380800068378448
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,16,8,128,1,float16,fp8,0,0.01037440001964569
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,16,2,128,1,float16,float16,0,0.012505599856376648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,16,8,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,16,1,128,1,float16,float16,0,0.11243200302124023
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,16,4,128,1,float16,float16,0,0.012489599734544754
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,16,1,128,1,float16,fp8,0,0.11855039596557618
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,16,2,128,1,float16,float16,0,0.11253119707107544
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,16,2,128,1,float16,fp8,0,0.11896799802780152
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,16,4,128,1,float16,float16,0,0.11663520336151123
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,16,4,128,1,float16,fp8,0,0.1188480019569397
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,16,4,128,1,fp8,fp8,0,0.1176144003868103
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,16,1,128,1,float16,float16,0,0.0394351989030838
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,16,8,128,1,float16,float16,0,0.12545119524002074
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,16,8,128,1,float16,fp8,0,0.11801600456237793
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,16,16,128,1,float16,float16,0,0.07524639964103699
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,16,16,128,1,float16,fp8,0,0.0719871997833252
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,16,16,128,1,fp8,fp8,0,0.07231199741363525
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,16,1,128,1,float16,float16,0,0.06055520176887512
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,16,1,128,1,float16,fp8,0,0.06244959831237793
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,16,2,128,1,float16,float16,0,0.061136001348495485
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,16,2,128,1,float16,fp8,0,0.06258879899978638
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,16,2,128,1,fp8,fp8,0,0.06262400150299072
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,16,4,128,1,float16,float16,0,0.06423839926719666
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,16,4,128,1,float16,fp8,0,0.06276000142097474
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,16,4,128,1,float16,float16,0,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,16,4,128,1,fp8,fp8,0,0.06318399906158448
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,16,8,128,1,float16,float16,0,0.06880639791488648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,16,8,128,1,float16,fp8,0,0.06364639997482299
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,16,8,128,1,fp8,fp8,0,0.06389920115470886
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,16,16,128,1,float16,float16,0,0.0414000004529953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,16,16,128,1,float16,fp8,0,0.03917439877986908
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,16,2,128,1,float16,fp8,0,0.01048159971833229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,16,1,128,1,float16,float16,0,0.03438880145549774
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,16,1,128,1,float16,fp8,0,0.03502239882946014
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,16,1,128,1,fp8,fp8,0,0.034971201419830324
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,16,2,128,1,float16,float16,0,0.03491199910640717
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,16,2,128,1,float16,fp8,0,0.035017600655555724
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,16,2,128,1,fp8,fp8,0,0.035068801045417784
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,16,4,128,1,float16,float16,0,0.03508000075817108
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,16,4,128,1,float16,fp8,0,0.0350383996963501
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,16,4,128,1,fp8,fp8,0,0.03503040075302124
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,16,1,128,1,fp8,fp8,0,0.11808160543441773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,16,8,128,1,float16,float16,0,0.03781279921531677
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,16,8,128,1,float16,fp8,0,0.0350383996963501
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,16,16,128,1,float16,float16,0,0.024796800315380098
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,16,8,128,1,fp8,fp8,0,0.03506079912185669
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,16,2,128,1,fp8,fp8,0,0.11860159635543824
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,16,16,128,1,float16,fp8,0,0.024751999974250795
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,16,16,128,1,fp8,fp8,0,0.024857600033283234
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,16,1,128,1,float16,float16,0,0.02104319930076599
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,16,1,128,1,float16,fp8,0,0.022700800001621245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,16,1,128,1,fp8,fp8,0,0.0227743998169899
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,16,2,128,1,float16,float16,0,0.022742399573326112
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,16,2,128,1,float16,fp8,0,0.0227743998169899
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,16,2,128,1,fp8,fp8,0,0.022652800381183624
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,16,4,128,1,float16,float16,0,0.02279199957847595
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,16,4,128,1,float16,fp8,0,0.0227743998169899
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,16,4,128,1,fp8,fp8,0,0.022777600586414336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,16,8,128,1,float16,float16,0,0.022716799378395082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,16,8,128,1,float16,fp8,0,0.022758400440216063
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,16,8,128,1,fp8,fp8,0,0.02279520034790039
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,16,16,128,1,float16,float16,0,0.016515199840068818
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,16,16,128,1,float16,fp8,0,0.016545599699020384
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,16,16,128,1,fp8,fp8,0,0.016596800088882445
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,16,1,128,1,float16,fp8,0,0.014582400023937226
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,16,1,128,1,fp8,fp8,0,0.014630399644374847
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,16,8,128,1,fp8,fp8,0,0.11864160299301148
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,16,2,128,1,float16,float16,0,0.014542399346828461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,16,2,128,1,fp8,fp8,0,0.014616000652313232
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,16,4,128,1,float16,float16,0,0.014619199931621552
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,16,4,128,1,float16,fp8,0,0.01454399973154068
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,16,8,128,1,float16,float16,0,0.01475680023431778
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,16,4,128,1,fp8,fp8,0,0.014612799882888794
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,16,1,128,1,fp8,fp8,0,0.06223040223121643
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,16,8,128,1,fp8,fp8,0,0.014529600739479065
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,16,16,128,1,float16,fp8,0,0.011913599818944931
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,16,16,128,1,float16,float16,0,0.012460800260305405
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,16,8,128,1,float16,fp8,0,0.014560000598430633
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,16,16,128,1,fp8,fp8,0,0.010769599676132202
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,16,1,128,1,float16,float16,0,0.010487999767065048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,16,1,128,1,float16,fp8,0,0.01048159971833229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,16,1,128,1,fp8,fp8,0,0.010515200346708298
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,16,2,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,16,2,128,1,float16,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,16,4,128,1,float16,float16,0,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,16,2,128,1,fp8,fp8,0,0.01043199971318245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,16,4,128,1,float16,fp8,0,0.010409600287675857
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,16,4,128,1,fp8,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,16,8,128,1,float16,float16,0,0.010505600273609162
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,16,8,128,1,float16,fp8,0,0.010513599961996078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,16,8,128,1,fp8,fp8,0,0.010499200224876404
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,16,16,128,1,float16,fp8,0,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,16,16,128,1,fp8,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,16,1,128,1,float16,float16,0,0.01048159971833229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,16,1,128,1,float16,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,16,2,128,1,float16,fp8,0,0.010355199873447418
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,16,1,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,16,2,128,1,fp8,fp8,0,0.010372799634933472
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,16,4,128,1,float16,float16,0,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,16,4,128,1,float16,fp8,0,0.010344000160694122
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,16,8,128,1,float16,float16,0,0.010385599732398988
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,16,8,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,16,8,128,1,fp8,fp8,0,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,16,16,128,1,float16,float16,0,0.010367999970912933
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,16,16,128,1,float16,fp8,0,0.010396800190210342
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,16,16,128,1,fp8,fp8,0,0.03920319974422455
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,16,16,128,1,fp8,fp8,0,0.010367999970912933
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,16,1,128,1,float16,float16,0,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,16,1,128,1,float16,fp8,0,0.01035040020942688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,16,1,128,1,fp8,fp8,0,0.01045759990811348
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,16,2,128,1,float16,float16,0,0.01040000021457672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,16,2,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,16,2,128,1,fp8,fp8,0,0.010371199995279311
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,16,4,128,1,float16,float16,0,0.010377600044012069
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,16,4,128,1,float16,fp8,0,0.01040000021457672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,16,4,128,1,fp8,fp8,0,0.010435199737548828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,16,8,128,1,float16,float16,0,0.01045759990811348
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,16,8,128,1,float16,fp8,0,0.010492800176143647
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,16,8,128,1,fp8,fp8,0,0.010372799634933472
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,16,16,128,1,float16,float16,0,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,16,16,128,1,float16,fp8,0,0.010356800258159637
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,16,16,128,1,fp8,fp8,0,0.010491199791431427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,16,1,128,1,float16,float16,0,0.010372799634933472
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,16,1,128,1,float16,fp8,0,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,16,1,128,1,fp8,fp8,0,0.010396800190210342
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,16,2,128,1,float16,fp8,0,0.010377600044012069
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,16,2,128,1,fp8,fp8,0,0.01037919968366623
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,16,4,128,1,float16,float16,0,0.01035040020942688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,16,4,128,1,float16,fp8,0,0.010342399775981902
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,16,1,128,1,float16,float16,0,0.014580799639225006
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,16,4,128,1,fp8,fp8,0,0.010387200117111205
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,16,8,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,16,2,128,1,float16,fp8,0,0.014574399590492249
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,16,8,128,1,float16,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,16,8,128,1,fp8,fp8,0,0.01045759990811348
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,16,1,128,1,float16,float16,0,0.09905440211296082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,16,1,128,1,fp8,fp8,0,0.09981920123100281
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,16,2,128,1,float16,float16,0,0.0994704008102417
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,16,2,128,1,float16,fp8,0,0.09967359900474548
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,16,16,128,1,float16,float16,0,0.010414399951696397
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,16,2,128,1,fp8,fp8,0,0.10025919675827026
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,16,4,128,1,float16,float16,0,0.10126080513000488
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,16,2,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,16,8,128,1,float16,float16,0,0.10637600421905517
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,16,4,128,1,fp8,fp8,0,0.0999184012413025
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,16,4,128,1,fp8,fp8,0,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,16,8,128,1,float16,fp8,0,0.1000480055809021
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,16,16,128,1,float16,float16,0,0.062118399143219
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,16,8,128,1,fp8,fp8,0,0.10078719854354859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,16,16,128,1,float16,fp8,0,0.05778239965438843
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,16,1,128,1,float16,float16,0,0.054782402515411374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,16,1,128,1,float16,fp8,0,0.05428479909896851
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,16,16,128,1,fp8,fp8,0,0.05776799917221069
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,16,1,128,1,fp8,fp8,0,0.0543503999710083
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,16,2,128,1,float16,fp8,0,0.05405120253562927
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,16,2,128,1,fp8,fp8,0,0.05412160158157349
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,16,4,128,1,float16,fp8,0,0.053958398103713986
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,16,4,128,1,fp8,fp8,0,0.05416640043258667
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,16,8,128,1,float16,float16,0,0.057811200618743896
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,16,8,128,1,float16,fp8,0,0.0542415976524353
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,16,16,128,1,float16,float16,0,0.034052801132202146
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,16,16,128,1,float16,fp8,0,0.033000001311302186
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,16,16,128,1,fp8,fp8,0,0.03299359977245331
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,16,1,128,1,float16,float16,0,0.031070399284362792
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,16,1,128,1,float16,fp8,0,0.030958399176597595
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,16,1,128,1,fp8,fp8,0,0.030935999751091004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,16,2,128,1,float16,float16,0,0.030953601002693176
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,16,2,128,1,float16,fp8,0,0.03091999888420105
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,16,2,128,1,fp8,fp8,0,0.03094080090522766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,16,4,128,1,float16,float16,0,0.03166399896144867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,16,4,128,1,float16,fp8,0,0.030972799658775328
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,16,4,128,1,fp8,fp8,0,0.030924800038337707
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,16,8,128,1,float16,float16,0,0.03304960131645203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,16,8,128,1,fp8,fp8,0,0.031007999181747438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,16,8,128,1,float16,fp8,0,0.03099839985370636
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,16,16,128,1,float16,fp8,0,0.02069759964942932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,16,16,128,1,float16,float16,0,0.021747200191020964
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,16,16,128,1,fp8,fp8,0,0.020803199708461763
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,16,1,128,1,float16,float16,0,0.02072640061378479
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,16,2,128,1,float16,float16,0,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,16,1,128,1,float16,fp8,0,0.02065120041370392
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,16,2,128,1,float16,float16,0,0.020681600272655486
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,16,2,128,1,float16,fp8,0,0.020716799795627593
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,16,2,128,1,fp8,fp8,0,0.020734399557113647
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,16,4,128,1,float16,float16,0,0.02067199945449829
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,16,4,128,1,float16,fp8,0,0.020571200549602507
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,16,4,128,1,fp8,fp8,0,0.020729599893093108
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,16,8,128,1,float16,float16,0,0.020703999698162077
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,16,8,128,1,float16,fp8,0,0.020721599459648132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,16,8,128,1,fp8,fp8,0,0.02067680060863495
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,16,16,128,1,float16,float16,0,0.014555199444293976
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,16,16,128,1,float16,fp8,0,0.014494399726390838
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,16,16,128,1,fp8,fp8,0,0.014496000111103058
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,16,1,128,1,float16,fp8,0,0.10025279521942139
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,16,1,128,1,float16,float16,0,0.013812799751758576
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,16,1,128,1,float16,fp8,0,0.013417600095272065
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,16,1,128,1,fp8,fp8,0,0.01446239948272705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,16,2,128,1,float16,fp8,0,0.013556799292564392
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,16,2,128,1,fp8,fp8,0,0.013607999682426453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,16,4,128,1,float16,float16,0,0.014532800018787383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,16,4,128,1,float16,fp8,0,0.014481599628925323
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,16,4,128,1,fp8,fp8,0,0.014547200500965118
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,16,4,128,1,float16,fp8,0,0.10001599788665771
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,16,8,128,1,float16,fp8,0,0.01451839953660965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,16,8,128,1,fp8,fp8,0,0.014377599954605103
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,16,2,128,1,float16,float16,0,0.05421760082244873
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,16,16,128,1,float16,float16,0,0.010414399951696397
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,16,4,128,1,float16,float16,0,0.05504000186920166
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,16,16,128,1,float16,fp8,0,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,16,1,128,1,float16,float16,0,0.01050880029797554
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,16,1,128,1,float16,fp8,0,0.010487999767065048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,16,1,128,1,fp8,fp8,0,0.010372799634933472
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,16,2,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,16,8,128,1,fp8,fp8,0,0.05425119996070862
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,16,2,128,1,fp8,fp8,0,0.010475199669599533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,16,4,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,16,4,128,1,float16,fp8,0,0.010494399815797806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,16,4,128,1,fp8,fp8,0,0.010447999835014344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,16,8,128,1,float16,float16,0,0.010494399815797806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,16,8,128,1,fp8,fp8,0,0.01048159971833229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,16,8,128,1,float16,fp8,0,0.010446400195360184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,16,16,128,1,float16,float16,0,0.010398399829864503
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,16,16,128,1,float16,fp8,0,0.010436800122261048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,16,16,128,1,fp8,fp8,0,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,16,1,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,16,1,128,1,float16,fp8,0,0.010444799810647965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,16,1,128,1,fp8,fp8,0,0.010388799756765366
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,16,2,128,1,float16,fp8,0,0.010396800190210342
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,16,2,128,1,float16,float16,0,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,16,2,128,1,fp8,fp8,0,0.010427200049161912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,16,4,128,1,float16,float16,0,0.01045759990811348
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,16,4,128,1,float16,fp8,0,0.01043199971318245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,16,8,128,1,float16,float16,0,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,16,8,128,1,float16,fp8,0,0.010427200049161912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,16,1,128,1,fp8,fp8,0,0.020710399746894835
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,16,8,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,16,16,128,1,float16,float16,0,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,16,16,128,1,float16,fp8,0,0.009427200257778167
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,16,16,128,1,fp8,fp8,0,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,16,1,128,1,float16,float16,0,0.009344000369310379
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,16,1,128,1,float16,fp8,0,0.009446399658918381
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,16,1,128,1,fp8,fp8,0,0.008907199651002885
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,16,2,128,1,float16,float16,0,0.010369600355625152
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,16,2,128,1,float16,fp8,0,0.009115199744701385
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,16,2,128,1,fp8,fp8,0,0.01003040000796318
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,16,4,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,16,4,128,1,float16,fp8,0,0.010096000134944915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,16,4,128,1,fp8,fp8,0,0.010335999727249145
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,16,8,128,1,float16,float16,0,0.010345599800348281
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,16,8,128,1,float16,fp8,0,0.010331200063228607
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,16,8,128,1,fp8,fp8,0,0.010435199737548828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,16,16,128,1,float16,fp8,0,0.010358399897813796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,16,2,128,1,float16,float16,0,0.014467200636863709
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,16,1,128,1,float16,float16,0,0.010369600355625152
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,16,1,128,1,float16,fp8,0,0.00888959988951683
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,16,1,128,1,fp8,fp8,0,0.010273600369691849
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,16,2,128,1,float16,float16,0,0.010310400277376175
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,16,2,128,1,float16,fp8,0,0.008604799956083297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,16,2,128,1,fp8,fp8,0,0.009495999664068222
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,16,4,128,1,float16,float16,0,0.009387200325727462
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,16,4,128,1,float16,fp8,0,0.009619200229644775
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,16,4,128,1,fp8,fp8,0,0.008585599809885025
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,16,8,128,1,float16,fp8,0,0.009040000289678574
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,16,8,128,1,float16,float16,0,0.014584000408649444
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,16,8,128,1,fp8,fp8,0,0.008675199747085572
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,16,1,128,1,float16,float16,0,0.09496319890022278
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,16,1,128,1,float16,fp8,0,0.09185439944267274
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,16,16,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,16,1,128,1,fp8,fp8,0,0.09194080233573913
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,16,2,128,1,float16,float16,0,0.09483839869499207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,16,2,128,1,float16,fp8,0,0.09186559915542603
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,16,2,128,1,fp8,fp8,0,0.09193919897079468
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,16,2,128,1,float16,fp8,0,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,16,4,128,1,float16,float16,0,0.09616000056266785
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,16,4,128,1,fp8,fp8,0,0.09200000166893005
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,16,4,128,1,float16,fp8,0,0.09223039746284485
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,16,8,128,1,float16,fp8,0,0.09241440296173095
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,16,8,128,1,fp8,fp8,0,0.09197919964790344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,16,16,128,1,float16,float16,0,0.055497598648071286
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,16,16,128,1,float16,fp8,0,0.05319679975509643
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,16,1,128,1,float16,float16,0,0.053636801242828366
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,16,16,128,1,fp8,fp8,0,0.05273119807243347
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,16,1,128,1,float16,fp8,0,0.050835198163986205
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,16,1,128,1,fp8,fp8,0,0.051179200410842896
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,16,2,128,1,float16,fp8,0,0.05080639719963074
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,16,2,128,1,fp8,fp8,0,0.05146080255508423
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,16,4,128,1,float16,float16,0,0.05367680191993714
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,16,4,128,1,float16,fp8,0,0.051204800605773926
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,16,4,128,1,fp8,fp8,0,0.0509663999080658
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,16,4,128,1,fp8,fp8,0,0.010363200306892395
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,16,8,128,1,float16,float16,0,0.05527520179748535
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,16,8,128,1,float16,fp8,0,0.05104960203170776
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,16,8,128,1,fp8,fp8,0,0.05114399790763855
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,16,16,128,1,float16,float16,0,0.03271040022373199
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,16,16,128,1,float16,fp8,0,0.03057439923286438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,16,16,128,1,fp8,fp8,0,0.030950400233268737
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,16,1,128,1,float16,float16,0,0.03094240128993988
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,16,1,128,1,float16,fp8,0,0.029016000032424927
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,16,1,128,1,fp8,fp8,0,0.029129600524902342
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,16,2,128,1,float16,float16,0,0.03105599880218506
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,16,2,128,1,float16,fp8,0,0.029023998975753786
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,16,2,128,1,fp8,fp8,0,0.02901279926300049
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,16,4,128,1,float16,float16,0,0.030958399176597595
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,16,4,128,1,float16,fp8,0,0.028987199068069458
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,16,4,128,1,fp8,fp8,0,0.0289247989654541
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,16,8,128,1,float16,fp8,0,0.028915199637413024
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,16,8,128,1,float16,float16,0,0.03246400058269501
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,16,16,128,1,float16,float16,0,0.01048320010304451
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,16,8,128,1,fp8,fp8,0,0.028889599442481994
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,16,16,128,1,fp8,fp8,0,0.009635200351476669
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,16,16,128,1,float16,fp8,0,0.01988160014152527
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,16,16,128,1,fp8,fp8,0,0.020182399451732634
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,16,1,128,1,float16,float16,0,0.020233599841594695
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,16,1,128,1,float16,fp8,0,0.01871040016412735
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,16,1,128,1,fp8,fp8,0,0.018700799345970152
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,16,2,128,1,float16,float16,0,0.02051839977502823
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,16,2,128,1,float16,fp8,0,0.018862399458885192
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,16,2,128,1,fp8,fp8,0,0.018648000061511995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,16,4,128,1,float16,fp8,0,0.018862399458885192
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,16,4,128,1,fp8,fp8,0,0.018702399730682374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,16,8,128,1,float16,float16,0,0.0207056000828743
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,16,8,128,1,float16,float16,0,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,16,16,128,1,float16,float16,0,0.014504000544548035
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,16,8,128,1,fp8,fp8,0,0.01912800073623657
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,16,16,128,1,float16,fp8,0,0.01430719941854477
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,16,16,128,1,fp8,fp8,0,0.014494399726390838
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,16,1,128,1,float16,fp8,0,0.01273760050535202
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,16,1,128,1,float16,float16,0,0.014476799964904785
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,16,2,128,1,float16,float16,0,0.013259199261665345
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,16,2,128,1,float16,fp8,0,0.012484800070524216
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,16,2,128,1,fp8,fp8,0,0.012548799812793731
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,16,4,128,1,float16,float16,0,0.013625599443912506
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,16,4,128,1,float16,fp8,0,0.012563200294971466
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,16,4,128,1,fp8,fp8,0,0.01249919980764389
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,16,8,128,1,float16,float16,0,0.014572800695896148
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,16,8,128,1,float16,fp8,0,0.012545600533485413
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,16,2,128,1,float16,float16,0,0.053230398893356325
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,16,8,128,1,fp8,fp8,0,0.013025599718093871
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,16,16,128,1,float16,float16,0,0.010438399761915207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,16,16,128,1,float16,fp8,0,0.010435199737548828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,16,16,128,1,fp8,fp8,0,0.01048159971833229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,16,1,128,1,float16,float16,0,0.010452800244092942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,16,1,128,1,float16,fp8,0,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,16,1,128,1,fp8,fp8,0,0.01048479974269867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,16,2,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,16,2,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,16,2,128,1,fp8,fp8,0,0.010401599854230881
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,16,4,128,1,float16,float16,0,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,16,4,128,1,float16,fp8,0,0.010419200360774993
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,16,4,128,1,fp8,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,16,8,128,1,float16,float16,0,0.010452800244092942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,16,8,128,1,float16,fp8,0,0.010486400127410889
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,16,8,128,1,fp8,fp8,0,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,16,16,128,1,float16,float16,0,0.010518400371074677
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,16,16,128,1,float16,fp8,0,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,16,16,128,1,fp8,fp8,0,0.010507199913263321
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,16,1,128,1,float16,float16,0,0.010438399761915207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,16,1,128,1,float16,fp8,0,0.010475199669599533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,16,1,128,1,fp8,fp8,0,0.010475199669599533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,16,2,128,1,float16,float16,0,0.010425599664449692
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,16,16,128,1,float16,float16,0,0.020691199600696562
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,16,2,128,1,float16,fp8,0,0.010444799810647965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,16,2,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,16,4,128,1,float16,float16,0,0.010428799688816071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,16,4,128,1,float16,fp8,0,0.010436800122261048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,16,4,128,1,fp8,fp8,0,0.01037919968366623
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,16,8,128,1,float16,float16,0,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,16,8,128,1,float16,fp8,0,0.0104032002389431
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,16,8,128,1,fp8,fp8,0,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,16,16,128,1,float16,float16,0,0.0104032002389431
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,16,4,128,1,float16,float16,0,0.02016959935426712
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,16,16,128,1,float16,fp8,0,0.009419199824333192
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,16,16,128,1,fp8,fp8,0,0.010260800272226334
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,16,1,128,1,float16,float16,0,0.009710399806499482
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,16,1,128,1,float16,fp8,0,0.008399999886751174
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,16,2,128,1,float16,float16,0,0.008502399921417237
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,16,1,128,1,fp8,fp8,0,0.008761599659919739
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,16,2,128,1,float16,fp8,0,0.008398400247097015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,16,8,128,1,float16,fp8,0,0.018636800348758698
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,16,4,128,1,float16,float16,0,0.008894400298595428
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,16,4,128,1,fp8,fp8,0,0.009424000233411788
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,16,4,128,1,float16,fp8,0,0.009411200135946273
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,16,8,128,1,float16,float16,0,0.01029599979519844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,16,8,128,1,fp8,fp8,0,0.008473599702119828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,16,16,128,1,float16,float16,0,0.010419200360774993
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,16,16,128,1,float16,fp8,0,0.008432000130414962
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,16,16,128,1,fp8,fp8,0,0.010391999781131745
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,16,1,128,1,float16,float16,0,0.008476799726486206
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,16,1,128,1,float16,fp8,0,0.00941760018467903
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,16,1,128,1,fp8,fp8,0,0.008393599838018417
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,16,2,128,1,float16,float16,0,0.010396800190210342
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,16,8,128,1,float16,fp8,0,0.010412800312042236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,16,2,128,1,float16,fp8,0,0.008419200032949447
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,16,2,128,1,fp8,fp8,0,0.00987199991941452
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,16,4,128,1,float16,fp8,0,0.009950400143861771
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,16,4,128,1,fp8,fp8,0,0.008582399785518646
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,16,1,128,1,fp8,fp8,0,0.0131632000207901
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,16,8,128,1,float16,float16,0,0.00921280011534691
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,16,8,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,16,8,128,1,fp8,fp8,0,0.010267200320959092
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,16,1,128,1,float16,float16,0,0.09342560172080994
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,16,2,128,1,float16,fp8,0,0.08723679780960084
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,16,8,128,1,float16,float16,0,0.09881119728088379
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,16,1,128,1,float16,fp8,0,0.0872879981994629
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,16,1,128,1,fp8,fp8,0,0.08767679929733277
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,16,2,128,1,fp8,fp8,0,0.08726879954338074
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,16,4,128,1,float16,float16,0,0.09339519739151
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,16,4,128,1,float16,fp8,0,0.08755199909210205
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,16,4,128,1,fp8,fp8,0,0.08737760186195373
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,16,8,128,1,float16,float16,0,0.09313759803771973
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,16,8,128,1,float16,fp8,0,0.0869104027748108
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,16,8,128,1,fp8,fp8,0,0.08810880184173583
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,16,16,128,1,float16,fp8,0,0.04886879920959473
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,16,16,128,1,fp8,fp8,0,0.04854559898376465
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,16,1,128,1,float16,float16,0,0.052748799324035645
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,16,1,128,1,float16,fp8,0,0.049086400866508485
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,16,1,128,1,fp8,fp8,0,0.04831840097904205
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,16,2,128,1,float16,float16,0,0.0528223991394043
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,16,2,128,1,float16,fp8,0,0.048767998814582825
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,16,4,128,1,float16,float16,0,0.05284159779548645
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,16,4,128,1,float16,fp8,0,0.04875200092792511
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,16,4,128,1,fp8,fp8,0,0.04933759868144989
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,16,8,128,1,float16,float16,0,0.052107197046279904
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,16,8,128,1,float16,fp8,0,0.049158400297164916
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,16,8,128,1,fp8,fp8,0,0.048065599799156186
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,16,16,128,1,float16,float16,0,0.030958399176597595
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,16,16,128,1,float16,fp8,0,0.028814399242401124
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,16,16,128,1,fp8,fp8,0,0.02889440059661865
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,16,1,128,1,float16,float16,0,0.030460798740386964
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,16,1,128,1,float16,fp8,0,0.028809601068496705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,16,1,128,1,fp8,fp8,0,0.02852639853954315
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,16,2,128,1,float16,float16,0,0.030947199463844298
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,16,2,128,1,fp8,fp8,0,0.02871679961681366
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,16,2,128,1,float16,fp8,0,0.028622400760650635
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,16,4,128,1,float16,float16,0,0.03091199994087219
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,16,4,128,1,float16,fp8,0,0.028705599904060363
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,16,4,128,1,fp8,fp8,0,0.028916800022125246
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,16,8,128,1,float16,float16,0,0.03078399896621704
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,16,8,128,1,fp8,fp8,0,0.028600001335144044
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,16,8,128,1,float16,fp8,0,0.0288239985704422
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,16,16,128,1,float16,float16,0,0.020076799392700195
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,16,16,128,1,float16,fp8,0,0.018654400110244752
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,16,16,128,1,fp8,fp8,0,0.018644799292087556
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,16,1,128,1,float16,float16,0,0.01945600062608719
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,16,1,128,1,float16,fp8,0,0.018688000738620758
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,16,2,128,1,fp8,fp8,0,0.008473599702119828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,16,1,128,1,fp8,fp8,0,0.01866399943828583
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,16,2,128,1,float16,float16,0,0.019313600659370423
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,16,2,128,1,float16,fp8,0,0.018639999628067016
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,16,2,128,1,fp8,fp8,0,0.018670399487018586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,16,4,128,1,float16,float16,0,0.01897439956665039
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,16,4,128,1,float16,fp8,0,0.01863040030002594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,16,4,128,1,fp8,fp8,0,0.018632000684738158
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,16,8,128,1,float16,float16,0,0.018785600364208222
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,16,8,128,1,float16,fp8,0,0.01865600049495697
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,16,8,128,1,fp8,fp8,0,0.018644799292087556
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,16,4,128,1,float16,float16,0,0.008489599823951722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,16,16,128,1,float16,fp8,0,0.012628799676895142
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,16,16,128,1,fp8,fp8,0,0.012510399520397186
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,16,1,128,1,float16,float16,0,0.014550399780273438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,16,1,128,1,float16,fp8,0,0.01249760016798973
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,16,1,128,1,fp8,fp8,0,0.012518399953842163
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,16,2,128,1,float16,float16,0,0.01451359987258911
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,16,2,128,1,float16,fp8,0,0.012780800461769104
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,16,2,128,1,fp8,fp8,0,0.012561599910259246
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,16,4,128,1,float16,float16,0,0.014542399346828461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,16,4,128,1,float16,fp8,0,0.012703999876976013
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,16,4,128,1,fp8,fp8,0,0.01250240057706833
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,16,8,128,1,float16,fp8,0,0.01273760050535202
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,16,8,128,1,fp8,fp8,0,0.012587200105190276
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,16,2,128,1,float16,float16,0,0.09372000098228454
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,16,16,128,1,float16,fp8,0,0.010425599664449692
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,16,16,128,1,fp8,fp8,0,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,16,1,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,16,1,128,1,float16,fp8,0,0.010361599922180175
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,16,1,128,1,fp8,fp8,0,0.010435199737548828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,16,16,128,1,float16,float16,0,0.05246880054473877
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,16,2,128,1,float16,float16,0,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,16,2,128,1,float16,fp8,0,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,16,2,128,1,fp8,fp8,0,0.010492800176143647
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,16,4,128,1,float16,float16,0,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,16,4,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,16,8,128,1,float16,float16,0,0.010436800122261048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,16,4,128,1,float16,fp8,0,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,16,8,128,1,float16,fp8,0,0.010428799688816071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,16,2,128,1,fp8,fp8,0,0.04773919880390167
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,16,16,128,1,float16,float16,0,0.010487999767065048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,16,16,128,1,float16,fp8,0,0.010444799810647965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,16,16,128,1,fp8,fp8,0,0.010385599732398988
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,16,1,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,16,1,128,1,float16,fp8,0,0.010497599840164185
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,16,1,128,1,fp8,fp8,0,0.010371199995279311
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,16,2,128,1,float16,float16,0,0.010487999767065048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,16,2,128,1,float16,fp8,0,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,16,2,128,1,fp8,fp8,0,0.01037919968366623
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,16,4,128,1,float16,float16,0,0.01045759990811348
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,16,4,128,1,float16,fp8,0,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,16,4,128,1,fp8,fp8,0,0.0103472001850605
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,16,8,128,1,float16,float16,0,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,16,8,128,1,float16,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,16,8,128,1,fp8,fp8,0,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,16,16,128,1,float16,float16,0,0.010425599664449692
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,16,16,128,1,float16,fp8,0,0.009080000221729279
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,16,16,128,1,fp8,fp8,0,0.010372799634933472
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,16,1,128,1,float16,float16,0,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,16,1,128,1,float16,fp8,0,0.010358399897813796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,16,1,128,1,fp8,fp8,0,0.008515200018882752
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,16,2,128,1,float16,fp8,0,0.010395199805498124
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,16,2,128,1,fp8,fp8,0,0.008523199707269669
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,16,4,128,1,float16,float16,0,0.01037440001964569
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,16,4,128,1,float16,fp8,0,0.008419200032949447
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,16,4,128,1,fp8,fp8,0,0.010371199995279311
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,16,8,128,1,float16,float16,0,0.009246399998664856
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,16,8,128,1,fp8,fp8,0,0.008452799916267396
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,16,16,128,1,float16,float16,0,0.009963200241327286
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,16,16,128,1,float16,fp8,0,0.008617600053548813
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,16,16,128,1,fp8,fp8,0,0.008958400040864945
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,16,1,128,1,float16,float16,0,0.009364800155162811
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,16,1,128,1,float16,fp8,0,0.008643200248479843
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,16,1,128,1,fp8,fp8,0,0.008390399813652038
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,16,16,128,1,float16,float16,0,0.014593599736690522
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,16,2,128,1,float16,fp8,0,0.00852160006761551
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,16,2,128,1,fp8,fp8,0,0.008423999696969987
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,16,4,128,1,float16,float16,0,0.00843999981880188
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,16,8,128,1,float16,float16,0,0.014499199390411378
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,16,4,128,1,fp8,fp8,0,0.008705600351095199
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,16,8,128,1,float16,float16,0,0.008489599823951722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,16,8,128,1,float16,fp8,0,0.00841279998421669
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,16,8,128,1,fp8,fp8,0,0.008472000062465668
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,16,8,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,16,2,128,1,float16,float16,0,0.010412800312042236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,16,8,128,1,float16,fp8,0,0.010340800136327743
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,16,2,128,1,float16,float16,0,0.008419200032949447
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,16,4,128,1,float16,fp8,0,0.008448000252246856
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,16,16,128,1,float16,float16,0,0.010556799918413162
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,12,1,128,1,float16,fp8,0,4.290412902832031
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,12,1,128,1,float16,float16,0,5.322735977172852
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,12,1,128,1,fp8,fp8,0,4.301011276245117
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,12,2,128,1,float16,fp8,0,4.30334243774414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,12,2,128,1,float16,float16,0,5.176521682739258
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,12,2,128,1,fp8,fp8,0,4.318462371826172
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,12,4,128,1,float16,fp8,0,4.296542358398438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,12,4,128,1,float16,float16,0,5.233659362792968
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,12,4,128,1,fp8,fp8,0,4.336320114135742
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,1,128,1,float16,fp8,0,2.2305679321289062
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,1,128,1,float16,float16,0,2.698628807067871
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,1,128,1,fp8,fp8,0,2.2230703353881838
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,12,128,1,float16,fp8,0,2.2873407363891602
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,12,128,1,fp8,fp8,0,2.286591911315918
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,2,128,1,float16,float16,0,2.6731807708740236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,2,128,1,float16,fp8,0,2.216748809814453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,2,128,1,fp8,fp8,0,2.210345649719238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,4,128,1,float16,fp8,0,2.211609649658203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,4,128,1,float16,float16,0,2.7198463439941407
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,12,128,1,fp8,fp8,0,1.2061375617980956
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,4,128,1,fp8,fp8,0,2.2161264419555664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,1,128,1,float16,float16,0,1.3240464210510254
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,1,128,1,float16,fp8,0,1.1681792259216308
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,1,128,1,fp8,fp8,0,1.3908831596374511
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,2,128,1,float16,float16,0,1.3074527740478517
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,2,128,1,float16,fp8,0,1.1703791618347168
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,2,128,1,fp8,fp8,0,1.2344719886779785
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,4,128,1,float16,float16,0,1.3216848373413086
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,4,128,1,float16,fp8,0,1.2338175773620605
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,4,128,1,fp8,fp8,0,1.2167424201965331
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,12,128,1,fp8,fp8,0,0.7489424228668213
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,1,128,1,float16,float16,0,0.7084368228912353
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,1,128,1,float16,fp8,0,0.6690207958221436
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,12,128,1,float16,fp8,0,1.3734047889709473
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,1,128,1,fp8,fp8,0,0.6978831768035889
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,2,128,1,float16,float16,0,0.7100831985473632
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,2,128,1,float16,fp8,0,0.6660463809967041
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,2,128,1,fp8,fp8,0,0.646236801147461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,4,128,1,float16,float16,0,0.7204016208648681
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,4,128,1,float16,fp8,0,0.657912015914917
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,4,128,1,fp8,fp8,0,0.6488175868988038
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,12,128,1,float16,fp8,0,0.6939663887023926
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,12,1,128,1,float16,fp8,0,2.5326511383056642
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,12,1,128,1,fp8,fp8,0,2.5515247344970704
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,12,1,128,1,float16,float16,0,3.0167503356933594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,12,2,128,1,float16,float16,0,3.083950424194336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,12,2,128,1,float16,fp8,0,2.530344009399414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,12,2,128,1,fp8,fp8,0,2.5286752700805666
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,12,4,128,1,float16,fp8,0,2.802631950378418
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,12,4,128,1,float16,float16,0,3.054996871948242
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,12,4,128,1,fp8,fp8,0,2.764036750793457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,12,128,1,float16,fp8,0,1.3712240219116212
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,12,128,1,fp8,fp8,0,1.380673599243164
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,1,128,1,float16,float16,0,1.6430208206176757
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,1,128,1,float16,fp8,0,1.3252016067504884
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,1,128,1,fp8,fp8,0,1.3140735626220703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,2,128,1,float16,float16,0,1.4666943550109863
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,2,128,1,fp8,fp8,0,1.3166000366210937
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,2,128,1,float16,fp8,0,1.4984463691711425
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,4,128,1,float16,float16,0,1.4861583709716797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,4,128,1,float16,fp8,0,1.535857582092285
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,4,128,1,fp8,fp8,0,1.3148591995239258
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,12,128,1,float16,fp8,0,0.7726575851440429
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,12,128,1,fp8,fp8,0,0.7625599861145019
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,1,128,1,float16,float16,0,0.7845839977264404
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,1,128,1,float16,fp8,0,0.7338543891906738
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,1,128,1,fp8,fp8,0,0.8158432006835937
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,2,128,1,float16,float16,0,0.7709712028503418
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,2,128,1,float16,fp8,0,0.7683807849884033
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,2,128,1,fp8,fp8,0,0.7091775894165039
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,4,128,1,float16,float16,0,0.78994722366333
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,4,128,1,float16,fp8,0,0.7078656196594239
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,4,128,1,fp8,fp8,0,0.71703200340271
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,12,128,1,float16,fp8,0,0.41902880668640136
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,12,128,1,fp8,fp8,0,0.4172656059265137
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,1,128,1,float16,float16,0,0.4278480052947998
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,1,128,1,float16,fp8,0,0.4026463985443115
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,1,128,1,fp8,fp8,0,0.40460638999938964
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,2,128,1,float16,float16,0,0.4274847984313965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,2,128,1,float16,fp8,0,0.4026319980621338
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,2,128,1,fp8,fp8,0,0.40250558853149415
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,4,128,1,float16,float16,0,0.43491201400756835
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,4,128,1,float16,fp8,0,0.4007984161376953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,4,128,1,fp8,fp8,0,0.40372319221496583
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,12,1,128,1,float16,float16,0,2.0207632064819334
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,12,1,128,1,float16,fp8,0,1.8194303512573242
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,12,1,128,1,fp8,fp8,0,2.0469472885131834
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,12,2,128,1,float16,float16,0,2.036471939086914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,12,2,128,1,float16,fp8,0,1.8575071334838866
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,12,2,128,1,fp8,fp8,0,2.0688528060913085
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,12,4,128,1,float16,float16,0,2.1354015350341795
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,12,4,128,1,float16,fp8,0,1.8460432052612306
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,12,4,128,1,fp8,fp8,0,2.040278434753418
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,12,128,1,float16,fp8,0,1.020094394683838
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,12,128,1,fp8,fp8,0,1.0034496307373046
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,1,128,1,float16,float16,0,1.0443872451782226
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,1,128,1,float16,fp8,0,0.9676976203918457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,1,128,1,fp8,fp8,0,0.9717599868774414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,2,128,1,float16,float16,0,1.0482815742492675
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,2,128,1,float16,fp8,0,0.9651455879211426
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,2,128,1,fp8,fp8,0,0.9873472213745117
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,4,128,1,float16,float16,0,1.0626607894897462
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,4,128,1,float16,fp8,0,0.9700016021728516
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,4,128,1,fp8,fp8,0,0.9941727638244628
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,12,128,1,float16,float16,0,1.35140962600708
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,12,128,1,float16,float16,0,0.8102592468261719
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,12,128,1,float16,fp8,0,0.5433968067169189
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,12,128,1,float16,float16,0,2.6936159133911133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,12,128,1,fp8,fp8,0,0.5415872097015381
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,1,128,1,float16,float16,0,0.5551248073577881
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,1,128,1,fp8,fp8,0,0.5168464183807373
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,12,128,1,float16,float16,0,0.5907519817352295
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,1,128,1,float16,fp8,0,0.6384543895721435
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,2,128,1,float16,float16,0,0.5541855812072753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,2,128,1,float16,fp8,0,0.5165472030639648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,2,128,1,fp8,fp8,0,0.5185919761657715
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,4,128,1,float16,float16,0,0.5962512016296386
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,12,128,1,float16,float16,0,0.33307681083679197
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,12,128,1,float16,float16,0,0.7275599956512451
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,4,128,1,fp8,fp8,0,0.5183040142059326
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,12,128,1,float16,fp8,0,0.33968799114227294
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,12,128,1,fp8,fp8,0,0.3158943891525269
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,1,128,1,float16,float16,0,0.31461760997772215
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,1,128,1,float16,fp8,0,0.3017263889312744
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,1,128,1,fp8,fp8,0,0.32085280418395995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,12,128,1,float16,float16,0,1.5657504081726075
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,2,128,1,float16,float16,0,0.31592159271240233
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,2,128,1,float16,fp8,0,0.30428481101989746
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,2,128,1,fp8,fp8,0,0.3018032073974609
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,4,128,1,float16,float16,0,0.3275712013244629
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,4,128,1,float16,fp8,0,0.352892804145813
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,4,128,1,fp8,fp8,0,0.3032815933227539
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,12,128,1,float16,float16,0,0.44654240608215334
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,12,1,128,1,float16,fp8,0,2.373182487487793
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,12,1,128,1,fp8,fp8,0,2.3923152923583983
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,12,1,128,1,float16,float16,0,2.709443283081055
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,4,128,1,float16,fp8,0,0.5276656150817871
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,12,2,128,1,float16,fp8,0,2.3781919479370117
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,12,128,1,float16,float16,0,1.1152928352355957
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,12,2,128,1,float16,float16,0,2.775894355773926
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,12,2,128,1,fp8,fp8,0,2.3758703231811524
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,12,128,1,float16,fp8,0,1.2964112281799316
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,12,128,1,float16,float16,0,1.4396448135375977
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,12,128,1,fp8,fp8,0,1.375152015686035
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,1,128,1,float16,float16,0,1.488864040374756
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,12,4,128,1,float16,fp8,0,2.379724884033203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,12,4,128,1,float16,float16,0,2.7786895751953127
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,1,128,1,float16,fp8,0,1.2219103813171386
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,12,4,128,1,fp8,fp8,0,2.611262321472168
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,1,128,1,fp8,fp8,0,1.2242927551269531
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,2,128,1,float16,float16,0,1.3647871971130372
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,2,128,1,fp8,fp8,0,1.3683695793151855
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,12,128,1,float16,float16,0,0.7454256057739258
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,12,128,1,float16,fp8,0,0.6845136165618897
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,4,128,1,float16,float16,0,1.36038236618042
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,12,128,1,fp8,fp8,0,0.8072015762329101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,4,128,1,fp8,fp8,0,1.224302387237549
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,4,128,1,float16,fp8,0,1.397545623779297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,1,128,1,float16,float16,0,0.7094064235687256
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,1,128,1,float16,fp8,0,0.6548143863677979
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,1,128,1,fp8,fp8,0,0.6475327968597412
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,2,128,1,float16,float16,0,0.6973840236663819
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,2,128,1,fp8,fp8,0,0.6472095966339111
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,2,128,1,float16,fp8,0,0.717575979232788
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,4,128,1,float16,float16,0,0.713809585571289
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,12,128,1,float16,float16,0,0.40417118072509767
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,4,128,1,float16,fp8,0,0.6887551784515381
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,4,128,1,fp8,fp8,0,0.6566368103027344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,12,128,1,fp8,fp8,0,0.37816638946533204
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,1,128,1,float16,float16,0,0.38611199855804446
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,1,128,1,float16,fp8,0,0.35922880172729493
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,1,128,1,fp8,fp8,0,0.3814255952835083
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,2,128,1,float16,float16,0,0.3768320083618164
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,2,128,1,float16,fp8,0,0.3583120107650757
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,2,128,1,fp8,fp8,0,0.378601598739624
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,4,128,1,float16,float16,0,0.3831808090209961
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,4,128,1,fp8,fp8,0,0.3640304088592529
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,12,128,1,float16,float16,0,0.23237440586090088
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,12,128,1,float16,fp8,0,0.23531360626220704
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,12,128,1,fp8,fp8,0,0.22154879570007324
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,1,128,1,float16,float16,0,0.2231760025024414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,1,128,1,fp8,fp8,0,0.2181391954421997
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,2,128,1,float16,float16,0,0.21938560009002686
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,2,128,1,float16,fp8,0,0.21379361152648926
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,2,128,1,fp8,fp8,0,0.2174288034439087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,2,128,1,float16,fp8,0,1.2430463790893556
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,4,128,1,float16,float16,0,0.22239038944244385
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,4,128,1,float16,fp8,0,0.21285600662231446
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,12,1,128,1,float16,fp8,0,1.434126377105713
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,12,1,128,1,float16,float16,0,1.537985610961914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,12,1,128,1,fp8,fp8,0,1.4372032165527344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,12,128,1,float16,fp8,0,0.39730880260467527
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,4,128,1,float16,fp8,0,0.3828191995620728
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,12,2,128,1,float16,float16,0,1.5469216346740722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,1,128,1,float16,fp8,0,0.21233758926391602
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,12,4,128,1,float16,float16,0,1.564139175415039
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,4,128,1,fp8,fp8,0,0.21838719844818116
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,12,128,1,float16,float16,0,0.8729663848876953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,12,4,128,1,float16,fp8,0,1.4350031852722167
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,12,128,1,float16,fp8,0,0.8003408432006835
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,12,4,128,1,fp8,fp8,0,1.5183039665222169
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,1,128,1,float16,float16,0,0.7864511966705322
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,1,128,1,float16,fp8,0,0.7438672065734864
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,1,128,1,fp8,fp8,0,0.7453792095184326
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,2,128,1,fp8,fp8,0,0.7434735774993897
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,2,128,1,float16,fp8,0,0.7746511936187744
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,4,128,1,float16,fp8,0,0.746284818649292
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,4,128,1,float16,float16,0,0.8036975860595703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,12,128,1,float16,float16,0,0.4595088005065918
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,4,128,1,fp8,fp8,0,0.7438255786895752
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,12,128,1,fp8,fp8,0,0.4285007953643799
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,1,128,1,float16,float16,0,0.4165855884552002
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,12,2,128,1,float16,fp8,0,1.4959967613220215
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,12,2,128,1,fp8,fp8,0,1.4374192237854004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,1,128,1,float16,fp8,0,0.40039520263671874
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,1,128,1,fp8,fp8,0,0.3995455980300903
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,2,128,1,float16,float16,0,0.4172815799713135
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,2,128,1,float16,fp8,0,0.40106401443481443
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,2,128,1,fp8,fp8,0,0.40112481117248533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,4,128,1,float16,float16,0,0.42481441497802735
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,4,128,1,float16,fp8,0,0.4062160015106201
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,12,128,1,float16,float16,0,0.253766393661499
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,4,128,1,fp8,fp8,0,0.40234718322753904
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,12,128,1,float16,fp8,0,0.24416000843048097
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,12,128,1,fp8,fp8,0,0.24314560890197753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,1,128,1,float16,float16,0,0.23465280532836913
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,1,128,1,float16,fp8,0,0.22871360778808594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,1,128,1,fp8,fp8,0,0.22890560626983641
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,2,128,1,float16,float16,0,0.23292479515075684
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,2,128,1,float16,fp8,0,0.22768640518188477
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,2,128,1,fp8,fp8,0,0.22968800067901612
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,4,128,1,float16,float16,0,0.23881919384002687
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,12,128,1,fp8,fp8,0,0.8237008094787598
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,4,128,1,float16,fp8,0,0.2285007953643799
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,12,128,1,float16,fp8,0,0.15164799690246583
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,4,128,1,fp8,fp8,0,0.2290191888809204
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,12,128,1,fp8,fp8,0,0.1515231966972351
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,1,128,1,float16,float16,0,0.14898879528045655
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,1,128,1,fp8,fp8,0,0.14530240297317504
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,2,128,1,float16,float16,0,0.1494032025337219
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,2,128,1,float16,fp8,0,0.14443360567092894
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,2,128,1,fp8,fp8,0,0.145196795463562
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,4,128,1,float16,fp8,0,0.14461439847946167
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,4,128,1,fp8,fp8,0,0.14438719749450685
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,2,128,1,float16,float16,0,0.7866079807281494
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,12,128,1,float16,fp8,0,0.4284832000732422
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,12,1,128,1,float16,float16,0,1.4612560272216797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,12,1,128,1,float16,fp8,0,1.4060400009155274
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,12,1,128,1,fp8,fp8,0,1.4037599563598633
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,12,2,128,1,float16,float16,0,1.4527968406677245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,12,2,128,1,float16,fp8,0,1.4040592193603516
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,12,4,128,1,float16,fp8,0,1.4034704208374023
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,12,2,128,1,fp8,fp8,0,1.507363224029541
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,12,128,1,float16,float16,0,0.15747519731521606
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,1,128,1,float16,fp8,0,0.1436911940574646
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,12,4,128,1,fp8,fp8,0,1.5000335693359375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,4,128,1,float16,float16,0,0.1514016032218933
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,12,128,1,float16,float16,0,0.8622528076171875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,12,128,1,float16,fp8,0,0.793064022064209
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,12,128,1,fp8,fp8,0,0.8276847839355469
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,1,128,1,float16,fp8,0,0.7212287902832031
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,1,128,1,fp8,fp8,0,0.7360896110534668
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,1,128,1,float16,float16,0,0.7382207870483398
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,2,128,1,float16,float16,0,0.7407968044281006
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,2,128,1,fp8,fp8,0,0.7221615791320801
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,4,128,1,float16,float16,0,0.7629968166351319
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,4,128,1,float16,fp8,0,0.7219535827636718
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,12,128,1,float16,float16,0,0.44037761688232424
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,4,128,1,fp8,fp8,0,0.7216591835021973
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,12,128,1,float16,fp8,0,0.4187903881072998
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,12,128,1,fp8,fp8,0,0.44269280433654784
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,1,128,1,float16,float16,0,0.3852351903915405
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,1,128,1,float16,fp8,0,0.3815200090408325
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,1,128,1,fp8,fp8,0,0.38118720054626465
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,2,128,1,float16,float16,0,0.3858288049697876
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,2,128,1,float16,fp8,0,0.39857280254364014
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,4,128,1,float16,float16,0,0.3996864080429077
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,4,128,1,float16,fp8,0,0.38202240467071535
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,4,128,1,fp8,fp8,0,0.38346400260925295
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,12,128,1,float16,float16,0,0.24106080532073976
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,12,128,1,float16,fp8,0,0.23323841094970704
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,12,128,1,fp8,fp8,0,0.23041601181030275
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,1,128,1,float16,fp8,0,0.2134255886077881
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,1,128,1,fp8,fp8,0,0.2122720003128052
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,12,4,128,1,float16,float16,0,1.487392044067383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,2,128,1,float16,float16,0,0.21272640228271483
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,2,128,1,fp8,fp8,0,0.21243040561676024
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,4,128,1,float16,float16,0,0.21847519874572754
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,4,128,1,float16,fp8,0,0.21158881187438966
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,4,128,1,fp8,fp8,0,0.21403999328613282
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,12,128,1,float16,float16,0,0.13990399837493897
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,12,128,1,float16,fp8,0,0.1348639965057373
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,12,128,1,fp8,fp8,0,0.13575359582901
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,1,128,1,float16,fp8,0,0.12534559965133668
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,1,128,1,fp8,fp8,0,0.1259536027908325
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,2,128,1,float16,float16,0,0.12699040174484252
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,2,128,1,fp8,fp8,0,0.12531039714813233
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,2,128,1,float16,fp8,0,0.7225808143615723
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,4,128,1,float16,float16,0,0.12970399856567383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,4,128,1,float16,fp8,0,0.12572159767150878
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,12,128,1,float16,float16,0,0.08749279975891114
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,4,128,1,fp8,fp8,0,0.12532960176467894
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,12,128,1,float16,fp8,0,0.0865776002407074
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,12,128,1,fp8,fp8,0,0.08615360260009766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,1,128,1,float16,fp8,0,0.08233439922332764
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,2,128,1,float16,float16,0,0.0844655990600586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,2,128,1,float16,fp8,0,0.08208320140838624
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,2,128,1,fp8,fp8,0,0.08222399950027466
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,4,128,1,float16,float16,0,0.08499040007591248
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,4,128,1,float16,fp8,0,0.08235039710998535
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,4,128,1,fp8,fp8,0,0.08255040049552917
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,1,128,1,float16,float16,0,0.21271519660949706
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,2,128,1,float16,fp8,0,0.21322879791259766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,12,1,128,1,float16,float16,0,0.8672479629516602
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,12,1,128,1,float16,fp8,0,0.8763504028320312
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,1,128,1,float16,float16,0,0.12780959606170655
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,12,1,128,1,fp8,fp8,0,0.8766639709472657
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,2,128,1,float16,fp8,0,0.12665120363235474
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,12,2,128,1,float16,float16,0,0.876153564453125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,1,128,1,float16,float16,0,0.08497920036315917
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,12,2,128,1,fp8,fp8,0,0.8763279914855957
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,1,128,1,fp8,fp8,0,0.081632000207901
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,12,4,128,1,float16,float16,0,0.9062864303588867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,12,4,128,1,float16,fp8,0,0.8741680145263672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,12,128,1,float16,float16,0,0.5364848136901855
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,12,128,1,float16,fp8,0,0.5091792106628418
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,12,4,128,1,fp8,fp8,0,0.9820464134216309
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,12,128,1,fp8,fp8,0,0.5053023815155029
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,1,128,1,float16,float16,0,0.4442255973815918
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,2,128,1,fp8,fp8,0,0.3828864097595215
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,1,128,1,float16,fp8,0,0.4535024166107178
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,1,128,1,fp8,fp8,0,0.4550367832183838
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,2,128,1,float16,float16,0,0.44504637718200685
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,2,128,1,fp8,fp8,0,0.4529615879058838
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,4,128,1,float16,float16,0,0.47837119102478026
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,12,128,1,float16,float16,0,0.27996799945831297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,4,128,1,float16,fp8,0,0.4559840202331543
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,4,128,1,fp8,fp8,0,0.45389761924743655
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,12,128,1,float16,fp8,0,0.2726671934127808
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,12,128,1,fp8,fp8,0,0.2710864067077637
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,1,128,1,float16,float16,0,0.24537279605865478
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,1,128,1,float16,fp8,0,0.2449552059173584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,1,128,1,fp8,fp8,0,0.24549760818481445
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,2,128,1,float16,float16,0,0.23895680904388428
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,2,128,1,fp8,fp8,0,0.24394240379333496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,4,128,1,float16,float16,0,0.25358080863952637
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,4,128,1,float16,fp8,0,0.24507360458374022
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,12,128,1,float16,float16,0,0.1553328037261963
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,12,128,1,float16,fp8,0,0.15276479721069336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,12,128,1,fp8,fp8,0,0.15746239423751832
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,1,128,1,float16,float16,0,0.13712480068206787
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,1,128,1,float16,fp8,0,0.13823679685592652
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,1,128,1,fp8,fp8,0,0.13812639713287353
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,2,128,1,float16,float16,0,0.139956796169281
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,2,128,1,float16,fp8,0,0.1388416051864624
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,2,128,1,fp8,fp8,0,0.13784639835357665
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,4,128,1,float16,float16,0,0.1424239993095398
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,4,128,1,float16,fp8,0,0.1399824023246765
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,4,128,1,fp8,fp8,0,0.13912639617919922
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,12,128,1,float16,float16,0,0.0959712028503418
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,12,128,1,float16,fp8,0,0.0927839994430542
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,12,128,1,fp8,fp8,0,0.09255679845809936
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,1,128,1,float16,float16,0,0.08787680268287659
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,1,128,1,float16,fp8,0,0.0864736020565033
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,1,128,1,fp8,fp8,0,0.08604639768600464
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,2,128,1,float16,float16,0,0.08796479701995849
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,2,128,1,float16,fp8,0,0.08649439811706543
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,4,128,1,float16,float16,0,0.08932160139083863
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,4,128,1,float16,fp8,0,0.08648160099983215
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,4,128,1,fp8,fp8,0,0.08652960062026978
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,12,2,128,1,float16,fp8,0,0.8752304077148437
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,12,128,1,float16,float16,0,0.05538880228996277
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,12,128,1,float16,fp8,0,0.05541759729385376
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,12,128,1,fp8,fp8,0,0.055086398124694826
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,1,128,1,float16,fp8,0,0.05151680111885071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,1,128,1,fp8,fp8,0,0.051425600051879884
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,2,128,1,float16,fp8,0,0.0514415979385376
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,2,128,1,fp8,fp8,0,0.05143839716911316
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,4,128,1,float16,float16,0,0.05446239709854126
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,2,128,1,float16,float16,0,0.05307360291481018
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,4,128,1,fp8,fp8,0,0.05141440033912659
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,2,128,1,float16,fp8,0,0.4753392219543457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,12,1,128,1,float16,float16,0,0.8716208457946777
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,12,1,128,1,float16,fp8,0,0.9072527885437012
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,4,128,1,fp8,fp8,0,0.24510879516601564
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,12,1,128,1,fp8,fp8,0,0.9488368034362793
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,12,2,128,1,float16,float16,0,0.8625823974609375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,2,128,1,fp8,fp8,0,0.08560640215873719
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,12,2,128,1,float16,fp8,0,0.9072336196899414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,1,128,1,float16,float16,0,0.053169602155685426
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,12,2,128,1,fp8,fp8,0,0.9140352249145508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,12,4,128,1,float16,float16,0,0.9507424354553222
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,12,4,128,1,float16,fp8,0,0.903932762145996
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,12,4,128,1,fp8,fp8,0,0.9063615798950195
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,12,128,1,float16,float16,0,0.5518208026885987
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,4,128,1,float16,fp8,0,0.05144960284233093
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,12,128,1,float16,fp8,0,0.5372479915618896
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,1,128,1,float16,float16,0,0.43827037811279296
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,1,128,1,float16,fp8,0,0.4661776065826416
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,1,128,1,fp8,fp8,0,0.4660463809967041
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,2,128,1,float16,fp8,0,0.4665823936462402
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,2,128,1,fp8,fp8,0,0.4653679847717285
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,4,128,1,float16,float16,0,0.46384320259094236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,2,128,1,float16,fp8,0,0.24506399631500245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,12,128,1,float16,float16,0,0.28695518970489503
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,12,128,1,float16,fp8,0,0.2813152074813843
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,12,128,1,fp8,fp8,0,0.28268160820007326
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,1,128,1,float16,float16,0,0.23112640380859376
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,1,128,1,float16,fp8,0,0.24764800071716309
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,1,128,1,fp8,fp8,0,0.24576001167297362
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,2,128,1,float16,float16,0,0.2331984043121338
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,2,128,1,float16,fp8,0,0.24765920639038086
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,2,128,1,fp8,fp8,0,0.24711999893188477
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,4,128,1,float16,float16,0,0.2425600051879883
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,4,128,1,float16,fp8,0,0.24764640331268312
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,12,128,1,float16,float16,0,0.15538239479064941
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,4,128,1,fp8,fp8,0,0.24771039485931395
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,12,128,1,float16,fp8,0,0.15529760122299194
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,1,128,1,float16,float16,0,0.13128000497817993
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,12,128,1,fp8,fp8,0,0.1553104043006897
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,1,128,1,fp8,fp8,0,0.13530080318450927
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,2,128,1,float16,float16,0,0.13179999589920044
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,2,128,1,float16,fp8,0,0.13578399419784545
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,2,128,1,fp8,fp8,0,0.1357408046722412
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,4,128,1,float16,float16,0,0.1371072053909302
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,4,128,1,float16,fp8,0,0.13688479661941527
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,4,128,1,fp8,fp8,0,0.13666720390319825
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,12,128,1,float16,float16,0,0.09202399849891663
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,12,128,1,float16,fp8,0,0.08907999992370605
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,12,128,1,fp8,fp8,0,0.08969759941101074
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,1,128,1,float16,float16,0,0.07927039861679078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,1,128,1,float16,fp8,0,0.08091840147972107
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,1,128,1,fp8,fp8,0,0.08018400073051453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,2,128,1,float16,float16,0,0.07925440073013305
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,2,128,1,float16,fp8,0,0.07995039820671082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,2,128,1,fp8,fp8,0,0.07970719933509826
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,4,128,1,float16,float16,0,0.08173440098762512
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,4,128,1,fp8,fp8,0,0.0803712010383606
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,12,128,1,float16,fp8,0,0.057436800003051756
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,12,128,1,fp8,fp8,0,0.05753440260887146
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,1,128,1,float16,float16,0,0.053487998247146604
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,12,128,1,fp8,fp8,0,0.583892822265625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,1,128,1,float16,fp8,0,0.05329279899597168
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,2,128,1,float16,float16,0,0.44067840576171874
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,2,128,1,float16,float16,0,0.05314080119132995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,2,128,1,float16,fp8,0,0.05336959958076477
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,2,128,1,fp8,fp8,0,0.05322880148887634
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,4,128,1,float16,fp8,0,0.053572797775268556
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,4,128,1,float16,float16,0,0.05491840243339539
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,4,128,1,fp8,fp8,0,0.052907198667526245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,12,128,1,float16,float16,0,0.04110879898071289
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,12,128,1,float16,fp8,0,0.03916159868240356
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,12,128,1,fp8,fp8,0,0.04102559983730316
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,4,128,1,float16,fp8,0,0.4659008026123047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,1,128,1,float16,float16,0,0.037518399953842166
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,4,128,1,fp8,fp8,0,0.466923189163208
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,1,128,1,fp8,fp8,0,0.03715679943561554
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,2,128,1,float16,float16,0,0.0377920001745224
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,2,128,1,float16,fp8,0,0.038601601123809816
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,2,128,1,fp8,fp8,0,0.03714079856872558
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,4,128,1,float16,fp8,0,0.03841120004653931
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,4,128,1,fp8,fp8,0,0.03715679943561554
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,12,1,128,1,float16,float16,0,0.5357312202453614
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,12,1,128,1,float16,fp8,0,0.5926864147186279
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,12,128,1,float16,float16,0,0.057094401121139525
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,12,1,128,1,fp8,fp8,0,0.592251205444336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,12,2,128,1,float16,float16,0,0.549835205078125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,1,128,1,fp8,fp8,0,0.05363839864730835
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,12,2,128,1,float16,fp8,0,0.5923632144927978
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,12,2,128,1,fp8,fp8,0,0.5921328067779541
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,12,4,128,1,float16,float16,0,0.5727024078369141
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,1,128,1,float16,fp8,0,0.03902559876441956
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,12,4,128,1,float16,fp8,0,0.5925072193145752
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,4,128,1,float16,float16,0,0.038945600390434265
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,12,128,1,float16,float16,0,0.36086559295654297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,1,128,1,float16,float16,0,0.27946081161499026
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,12,128,1,fp8,fp8,0,0.36063199043273925
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,1,128,1,float16,fp8,0,0.3086816072463989
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,1,128,1,float16,fp8,0,0.13623679876327516
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,1,128,1,fp8,fp8,0,0.3069216012954712
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,2,128,1,float16,float16,0,0.28091039657592776
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,2,128,1,float16,fp8,0,0.30743839740753176
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,4,128,1,float16,float16,0,0.2960383892059326
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,2,128,1,fp8,fp8,0,0.3080528020858765
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,4,128,1,fp8,fp8,0,0.30773119926452636
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,12,128,1,float16,fp8,0,0.19303519725799562
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,12,128,1,fp8,fp8,0,0.19256319999694824
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,1,128,1,float16,float16,0,0.15286400318145751
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,4,128,1,float16,fp8,0,0.08059999942779542
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,1,128,1,fp8,fp8,0,0.16466879844665527
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,1,128,1,float16,fp8,0,0.1647968053817749
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,2,128,1,float16,float16,0,0.1536960005760193
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,2,128,1,float16,fp8,0,0.1644144058227539
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,2,128,1,fp8,fp8,0,0.16512320041656495
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,4,128,1,float16,float16,0,0.16081440448760986
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,4,128,1,float16,fp8,0,0.1658064007759094
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,4,128,1,fp8,fp8,0,0.16487840414047242
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,12,128,1,float16,float16,0,0.10647519826889038
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,12,128,1,float16,fp8,0,0.10829919576644897
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,12,128,1,fp8,fp8,0,0.10770239830017089
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,1,128,1,float16,float16,0,0.08772799968719483
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,1,128,1,float16,fp8,0,0.09298239946365357
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,1,128,1,fp8,fp8,0,0.09314720034599304
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,2,128,1,float16,float16,0,0.0893119990825653
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,2,128,1,fp8,fp8,0,0.09260159730911255
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,4,128,1,float16,float16,0,0.09336640238761902
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,4,128,1,float16,fp8,0,0.09313600063323975
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,4,128,1,fp8,fp8,0,0.0930400013923645
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,12,128,1,float16,fp8,0,0.06370880007743836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,12,128,1,fp8,fp8,0,0.06375200152397156
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,1,128,1,float16,float16,0,0.05589119791984558
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,12,128,1,float16,fp8,0,0.361027193069458
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,1,128,1,float16,fp8,0,0.05740000009536743
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,12,4,128,1,fp8,fp8,0,0.5917247772216797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,2,128,1,float16,fp8,0,0.05750880241394043
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,2,128,1,fp8,fp8,0,0.05692480206489563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,4,128,1,float16,float16,0,0.057529598474502563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,4,128,1,float16,fp8,0,0.05646399855613708
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,12,128,1,float16,float16,0,0.03711999952793121
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,4,128,1,fp8,fp8,0,0.05730239748954773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,12,128,1,float16,fp8,0,0.037092798948287965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,1,128,1,float16,float16,0,0.03388639986515045
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,1,128,1,float16,fp8,0,0.03467999994754791
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,1,128,1,fp8,fp8,0,0.03461439907550812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,2,128,1,float16,float16,0,0.03373120129108429
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,12,128,1,float16,float16,0,0.19120800495147705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,2,128,1,float16,fp8,0,0.03451519906520843
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,2,128,1,fp8,fp8,0,0.034750398993492124
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,4,128,1,float16,float16,0,0.03503200113773346
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,4,128,1,float16,fp8,0,0.034806400537490845
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,12,128,1,float16,float16,0,0.032971200346946714
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,4,128,1,fp8,fp8,0,0.035078400373458864
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,12,128,1,float16,fp8,0,0.03288159966468811
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,12,128,1,fp8,fp8,0,0.03304960131645203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,1,128,1,float16,float16,0,0.03089759945869446
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,1,128,1,float16,fp8,0,0.030958399176597595
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,1,128,1,fp8,fp8,0,0.030931198596954347
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,2,128,1,float16,float16,0,0.031201601028442383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,2,128,1,float16,fp8,0,0.030865600705146788
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,2,128,1,fp8,fp8,0,0.031004801392555237
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,4,128,1,float16,float16,0,0.03094559907913208
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,4,128,1,float16,fp8,0,0.030943998694419862
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,4,128,1,fp8,fp8,0,0.030854400992393494
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,12,1,128,1,float16,float16,0,0.573203182220459
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,12,1,128,1,float16,fp8,0,0.653929615020752
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,2,128,1,float16,fp8,0,0.09231359958648681
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,12,128,1,float16,float16,0,0.0640496015548706
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,12,1,128,1,fp8,fp8,0,0.656331205368042
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,1,128,1,fp8,fp8,0,0.05738239884376526
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,2,128,1,float16,float16,0,0.055718398094177245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,12,2,128,1,float16,fp8,0,0.6547135829925537
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,12,2,128,1,fp8,fp8,0,0.6580111980438232
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,12,128,1,fp8,fp8,0,0.03739840090274811
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,12,4,128,1,float16,float16,0,0.6226943969726563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,12,128,1,float16,float16,0,0.4030896186828613
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,12,4,128,1,float16,fp8,0,0.6571663856506348
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,12,4,128,1,fp8,fp8,0,0.6581888198852539
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,4,128,1,float16,fp8,0,0.3087680101394653
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,12,128,1,float16,fp8,0,0.40923519134521485
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,1,128,1,float16,float16,0,0.29594080448150634
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,12,128,1,fp8,fp8,0,0.4095183849334717
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,2,128,1,float16,float16,0,0.2965967893600464
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,1,128,1,fp8,fp8,0,0.3387664079666138
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,2,128,1,float16,fp8,0,0.3380896091461182
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,2,128,1,fp8,fp8,0,0.3377552032470703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,4,128,1,float16,float16,0,0.3184959888458252
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,4,128,1,float16,fp8,0,0.33914239406585694
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,12,128,1,float16,float16,0,0.21059839725494384
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,4,128,1,fp8,fp8,0,0.3382047891616821
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,12,128,1,float16,fp8,0,0.2138144016265869
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,1,128,1,float16,float16,0,0.15958720445632935
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,1,128,1,float16,fp8,0,0.1793056011199951
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,1,128,1,fp8,fp8,0,0.17876319885253905
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,2,128,1,float16,float16,0,0.1593008041381836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,2,128,1,float16,fp8,0,0.17929600477218627
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,2,128,1,fp8,fp8,0,0.17903679609298706
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,4,128,1,float16,float16,0,0.169868803024292
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,4,128,1,float16,fp8,0,0.17864639759063722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,12,128,1,float16,float16,0,0.11495840549468994
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,12,128,1,float16,fp8,0,0.11700960397720336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,12,128,1,fp8,fp8,0,0.11699680089950562
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,1,128,1,float16,float16,0,0.09095519781112671
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,1,128,1,float16,fp8,0,0.09791039824485778
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,1,128,1,fp8,fp8,0,0.0976639986038208
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,2,128,1,float16,float16,0,0.09135839939117432
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,2,128,1,float16,fp8,0,0.09769920110702515
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,2,128,1,fp8,fp8,0,0.09786080121994019
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,4,128,1,float16,float16,0,0.09579359889030456
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,4,128,1,float16,fp8,0,0.09848480224609375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,4,128,1,fp8,fp8,0,0.09894559979438781
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,12,128,1,float16,float16,0,0.0668720006942749
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,12,128,1,float16,fp8,0,0.06720319986343384
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,12,128,1,fp8,fp8,0,0.06655359864234925
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,1,128,1,float16,float16,0,0.05460799932479858
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,1,128,1,float16,fp8,0,0.05823839902877807
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,1,128,1,fp8,fp8,0,0.057868802547454835
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,2,128,1,float16,float16,0,0.054604798555374146
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,2,128,1,float16,fp8,0,0.05820000171661377
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,12,2,128,1,float16,float16,0,0.5739247798919678
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,4,128,1,float16,float16,0,0.05715519785881042
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,4,128,1,float16,fp8,0,0.058430397510528566
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,4,128,1,fp8,fp8,0,0.05746560096740723
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,12,128,1,float16,float16,0,0.04116640090942383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,12,128,1,float16,fp8,0,0.04222719967365265
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,12,128,1,fp8,fp8,0,0.041912001371383664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,1,128,1,float16,float16,0,0.03706879913806915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,1,128,1,float16,fp8,0,0.0377344012260437
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,1,128,1,fp8,fp8,0,0.03812159895896912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,2,128,1,float16,float16,0,0.03704479932785034
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,2,128,1,float16,fp8,0,0.038078400492668155
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,2,128,1,fp8,fp8,0,0.038134399056434634
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,4,128,1,float16,float16,0,0.03779999911785126
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,4,128,1,float16,fp8,0,0.03790720105171204
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,4,128,1,fp8,fp8,0,0.03799999952316284
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,12,128,1,float16,fp8,0,0.028806400299072266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,12,128,1,fp8,fp8,0,0.028887999057769776
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,12,128,1,float16,float16,0,0.02709760069847107
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,1,128,1,float16,float16,0,0.025043201446533204
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,1,128,1,float16,fp8,0,0.026756799221038817
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,1,128,1,fp8,fp8,0,0.026739200949668883
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,2,128,1,float16,float16,0,0.024987199902534486
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,2,128,1,float16,fp8,0,0.026740801334381104
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,2,128,1,fp8,fp8,0,0.026872000098228453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,4,128,1,float16,float16,0,0.026766398549079896
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,4,128,1,float16,fp8,0,0.026731199026107787
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,1,128,1,float16,fp8,0,0.3377648115158081
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,12,128,1,float16,float16,0,0.02553279995918274
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,4,128,1,fp8,fp8,0,0.026855999231338502
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,12,128,1,float16,fp8,0,0.025644800066947936
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,1,128,1,float16,float16,0,0.02481279969215393
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,1,128,1,float16,fp8,0,0.024772800505161285
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,1,128,1,fp8,fp8,0,0.024806399643421174
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,2,128,1,float16,float16,0,0.02470400035381317
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,2,128,1,float16,fp8,0,0.02483839988708496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,2,128,1,fp8,fp8,0,0.02479040026664734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,4,128,1,float16,float16,0,0.024803200364112855
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,4,128,1,float16,fp8,0,0.024779200553894043
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,4,128,1,fp8,fp8,0,0.0247871994972229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,12,128,1,fp8,fp8,0,0.21455199718475343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,12,1,128,1,float16,float16,0,0.43383197784423827
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,12,1,128,1,float16,fp8,0,0.5304512023925781
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,12,1,128,1,fp8,fp8,0,0.5280464172363282
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,12,2,128,1,float16,float16,0,0.438047981262207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,4,128,1,fp8,fp8,0,0.1794927954673767
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,12,2,128,1,float16,fp8,0,0.5304207801818848
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,12,4,128,1,float16,float16,0,0.47743840217590333
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,12,2,128,1,fp8,fp8,0,0.5297760009765625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,12,4,128,1,float16,fp8,0,0.5296160221099854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,12,4,128,1,fp8,fp8,0,0.5309616088867187
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,12,12,128,1,float16,float16,0,0.32994880676269533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,12,1,128,1,float16,float16,0,0.22698719501495362
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,12,12,128,1,float16,fp8,0,0.3397615909576416
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,12,12,128,1,fp8,fp8,0,0.34169280529022217
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,12,1,128,1,float16,fp8,0,0.27388958930969237
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,2,128,1,fp8,fp8,0,0.05804799795150757
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,12,2,128,1,float16,fp8,0,0.27332639694213867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,12,2,128,1,fp8,fp8,0,0.271612811088562
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,12,4,128,1,float16,fp8,0,0.2730736017227173
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,12,12,128,1,float16,float16,0,0.17367199659347535
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,12,12,128,1,float16,fp8,0,0.17835040092468263
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,12,1,128,1,float16,float16,0,0.12300000190734864
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,12,1,128,1,float16,fp8,0,0.14377599954605103
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,12,1,128,1,fp8,fp8,0,0.1448591947555542
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,12,2,128,1,float16,float16,0,0.12367199659347534
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,12,2,128,1,float16,fp8,0,0.14425599575042725
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,12,128,1,fp8,fp8,0,0.026683199405670165
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,12,2,128,1,fp8,fp8,0,0.1441648006439209
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,12,4,128,1,float16,fp8,0,0.14481120109558104
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,12,4,128,1,fp8,fp8,0,0.14501600265502929
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,12,12,128,1,float16,float16,0,0.09464319944381713
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,12,12,128,1,float16,fp8,0,0.09810879826545715
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,12,12,128,1,fp8,fp8,0,0.09797120094299316
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,12,1,128,1,float16,float16,0,0.0705456018447876
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,12,1,128,1,float16,fp8,0,0.07914080023765564
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,12,1,128,1,fp8,fp8,0,0.07832639813423156
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,12,2,128,1,float16,float16,0,0.0703599989414215
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,12,2,128,1,float16,fp8,0,0.07906079888343812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,12,2,128,1,float16,float16,0,0.2283008098602295
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,12,1,128,1,fp8,fp8,0,0.2730448007583618
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,12,2,128,1,fp8,fp8,0,0.07920479774475098
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,12,4,128,1,float16,float16,0,0.0758192002773285
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,12,4,128,1,float16,fp8,0,0.080103999376297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,12,4,128,1,fp8,fp8,0,0.07980800271034241
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,12,12,128,1,float16,float16,0,0.05454239845275879
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,12,12,128,1,float16,fp8,0,0.05528640151023865
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,12,4,128,1,fp8,fp8,0,0.2735136032104492
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,12,1,128,1,float16,fp8,0,0.04525440037250519
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,12,1,128,1,fp8,fp8,0,0.04638560116291046
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,12,2,128,1,float16,float16,0,0.041193601489067075
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,12,2,128,1,float16,fp8,0,0.04591360092163086
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,12,12,128,1,fp8,fp8,0,0.17918399572372437
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,12,2,128,1,fp8,fp8,0,0.04549440145492554
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,12,4,128,1,float16,float16,0,0.043823999166488645
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,12,4,128,1,fp8,fp8,0,0.045798400044441225
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,12,12,128,1,float16,float16,0,0.03286080062389374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,12,12,128,1,float16,fp8,0,0.03513439893722534
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,12,12,128,1,fp8,fp8,0,0.035062399506568906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,12,1,128,1,float16,float16,0,0.028918400406837463
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,12,1,128,1,float16,fp8,0,0.030900800228118898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,12,1,128,1,fp8,fp8,0,0.030987200140953065
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,12,2,128,1,float16,float16,0,0.0288783997297287
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,12,2,128,1,float16,fp8,0,0.03091840147972107
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,12,2,128,1,fp8,fp8,0,0.03092319965362549
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,12,4,128,1,float16,fp8,0,0.0308896005153656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,12,4,128,1,float16,float16,0,0.02982879877090454
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,12,4,128,1,float16,float16,0,0.13232320547103882
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,12,4,128,1,fp8,fp8,0,0.030939200520515443
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,12,12,128,1,float16,fp8,0,0.022729599475860597
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,12,12,128,1,fp8,fp8,0,0.022675199806690215
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,12,1,128,1,float16,float16,0,0.018688000738620758
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,12,1,128,1,float16,fp8,0,0.02067359983921051
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,12,1,128,1,fp8,fp8,0,0.020708799362182617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,12,2,128,1,float16,fp8,0,0.02072480022907257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,12,2,128,1,fp8,fp8,0,0.020715199410915375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,12,4,128,1,float16,float16,0,0.020708799362182617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,12,4,128,1,float16,fp8,0,0.020737600326538087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,12,4,128,1,fp8,fp8,0,0.020715199410915375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,12,12,128,1,float16,float16,0,0.02004159986972809
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,12,12,128,1,float16,fp8,0,0.02072799950838089
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,12,12,128,1,fp8,fp8,0,0.020750400424003602
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,12,1,128,1,float16,float16,0,0.018676799535751343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,12,1,128,1,float16,fp8,0,0.018691200017929076
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,12,1,128,1,fp8,fp8,0,0.01871200054883957
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,12,2,128,1,float16,float16,0,0.018713599443435668
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,12,2,128,1,float16,fp8,0,0.018671999871730804
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,12,2,128,1,fp8,fp8,0,0.01865759938955307
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,12,4,128,1,float16,float16,0,0.01863359957933426
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,12,4,128,1,float16,fp8,0,0.018700799345970152
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,12,4,128,1,fp8,fp8,0,0.0186256006360054
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,12,4,128,1,float16,float16,0,0.2482624053955078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,12,12,128,1,float16,fp8,0,0.018641600012779237
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,12,12,128,1,fp8,fp8,0,0.0186271995306015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,12,12,128,1,fp8,fp8,0,0.054983997344970705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,12,1,128,1,float16,float16,0,0.01852799952030182
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,12,1,128,1,float16,fp8,0,0.018676799535751343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,12,1,128,1,fp8,fp8,0,0.01868479996919632
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,12,2,128,1,float16,float16,0,0.018545599281787874
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,12,2,128,1,float16,fp8,0,0.01866399943828583
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,12,2,128,1,fp8,fp8,0,0.018673600256443025
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,12,4,128,1,float16,float16,0,0.018571199476718904
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,12,4,128,1,float16,fp8,0,0.018612800538539885
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,12,4,128,1,fp8,fp8,0,0.0186256006360054
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,12,4,128,1,float16,fp8,0,0.04580160081386566
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,12,1,128,1,float16,float16,0,0.18788479566574096
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,12,1,128,1,fp8,fp8,0,0.23615679740905762
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,12,1,128,1,float16,fp8,0,0.2362607955932617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,12,2,128,1,float16,fp8,0,0.2359247922897339
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,12,2,128,1,fp8,fp8,0,0.2361504077911377
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,12,4,128,1,float16,float16,0,0.20943200588226318
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,12,12,128,1,float16,float16,0,0.020712000131607056
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,12,4,128,1,float16,fp8,0,0.23614718914031982
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,12,12,128,1,float16,float16,0,0.15300320386886596
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,12,4,128,1,fp8,fp8,0,0.2368175983428955
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,12,12,128,1,float16,fp8,0,0.16106719970703126
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,12,2,128,1,float16,float16,0,0.018926399946212768
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,12,1,128,1,float16,float16,0,0.10269919633865357
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,12,1,128,1,float16,fp8,0,0.12577439546585084
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,12,12,128,1,fp8,fp8,0,0.15992000102996826
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,12,1,128,1,fp8,fp8,0,0.12650400400161743
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,12,2,128,1,float16,float16,0,0.1035264015197754
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,12,2,128,1,float16,fp8,0,0.12591359615325928
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,12,2,128,1,fp8,fp8,0,0.1258576035499573
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,12,4,128,1,float16,fp8,0,0.12706880569458007
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,12,12,128,1,float16,fp8,0,0.08931840062141419
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,12,12,128,1,fp8,fp8,0,0.08889120221138
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,12,1,128,1,float16,float16,0,0.06039680242538452
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,12,1,128,1,float16,fp8,0,0.06986079812049865
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,12,1,128,1,fp8,fp8,0,0.06984639763832093
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,12,2,128,1,float16,float16,0,0.06064479947090149
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,12,2,128,1,float16,fp8,0,0.06985440254211425
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,12,2,128,1,fp8,fp8,0,0.06986240148544312
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,12,12,128,1,float16,float16,0,0.01870719939470291
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,12,1,128,1,float16,float16,0,0.04148319959640503
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,12,4,128,1,float16,fp8,0,0.0700111985206604
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,12,4,128,1,fp8,fp8,0,0.07106400132179261
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,12,12,128,1,float16,float16,0,0.0472927987575531
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,12,12,128,1,float16,fp8,0,0.04859359860420227
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,12,12,128,1,fp8,fp8,0,0.04761599898338318
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,12,1,128,1,float16,float16,0,0.034999999403953555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,12,1,128,1,float16,fp8,0,0.03917439877986908
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,12,1,128,1,fp8,fp8,0,0.03943679928779602
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,12,2,128,1,float16,float16,0,0.034360000491142274
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,12,2,128,1,float16,fp8,0,0.03924480080604553
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,12,2,128,1,float16,float16,0,0.18912479877471924
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,12,2,128,1,fp8,fp8,0,0.039136001467704774
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,12,4,128,1,float16,float16,0,0.037003201246261594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,12,4,128,1,float16,fp8,0,0.03967199921607971
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,12,4,128,1,fp8,fp8,0,0.039155200123786926
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,12,12,128,1,float16,float16,0,0.027399998903274537
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,12,12,128,1,float16,fp8,0,0.032199999690055846
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,12,12,128,1,fp8,fp8,0,0.030959999561309813
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,12,1,128,1,float16,float16,0,0.024796800315380098
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,12,1,128,1,float16,fp8,0,0.02738400101661682
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,12,1,128,1,fp8,fp8,0,0.026836800575256347
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,12,2,128,1,float16,fp8,0,0.02688960134983063
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,12,2,128,1,fp8,fp8,0,0.027423998713493346
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,12,4,128,1,float16,float16,0,0.02590720057487488
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,12,4,128,1,float16,fp8,0,0.027195200324058533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,12,4,128,1,fp8,fp8,0,0.02693440020084381
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,12,12,128,1,float16,float16,0,0.018628799915313722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,12,12,128,1,float16,fp8,0,0.020688000321388244
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,12,12,128,1,fp8,fp8,0,0.020684799551963805
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,12,1,128,1,float16,float16,0,0.01661760061979294
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,12,4,128,1,float16,float16,0,0.11298880577087403
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,12,1,128,1,fp8,fp8,0,0.01866080015897751
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,12,2,128,1,float16,float16,0,0.01661760061979294
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,12,2,128,1,float16,fp8,0,0.01857600063085556
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,12,2,128,1,fp8,fp8,0,0.018595199286937713
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,12,4,128,1,float16,float16,0,0.017745600640773775
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,12,4,128,1,float16,fp8,0,0.01858399957418442
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,12,4,128,1,fp8,fp8,0,0.01858240067958832
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,12,12,128,1,float16,float16,0,0.01659200042486191
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,12,12,128,1,float16,fp8,0,0.017238399386405943
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,12,12,128,1,fp8,fp8,0,0.016596800088882445
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,12,1,128,1,float16,float16,0,0.016279999911785126
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,12,1,128,1,float16,fp8,0,0.016571199893951415
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,12,1,128,1,fp8,fp8,0,0.01658560037612915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,12,2,128,1,float16,float16,0,0.014556799829006196
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,12,2,128,1,float16,fp8,0,0.01661919951438904
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,12,2,128,1,fp8,fp8,0,0.016521599888801575
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,12,4,128,1,float16,float16,0,0.016598400473594666
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,12,4,128,1,float16,fp8,0,0.016569599509239197
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,12,4,128,1,fp8,fp8,0,0.01661919951438904
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,12,12,128,1,float16,float16,0,0.01653279960155487
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,12,12,128,1,float16,fp8,0,0.016547200083732606
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,12,1,128,1,float16,float16,0,0.01459839940071106
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,12,12,128,1,fp8,fp8,0,0.016553600132465363
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,12,1,128,1,float16,fp8,0,0.014643199741840363
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,12,1,128,1,fp8,fp8,0,0.014587199687957764
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,12,2,128,1,float16,float16,0,0.014584000408649444
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,12,2,128,1,float16,fp8,0,0.014537599682807923
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,12,4,128,1,float16,float16,0,0.0653168022632599
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,12,2,128,1,fp8,fp8,0,0.014587199687957764
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,12,4,128,1,float16,float16,0,0.014571200311183929
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,12,4,128,1,float16,fp8,0,0.014616000652313232
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,12,12,128,1,float16,float16,0,0.014903999865055084
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,12,12,128,1,float16,fp8,0,0.014601600170135499
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,12,12,128,1,fp8,fp8,0,0.015033599734306336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,12,1,128,1,float16,float16,0,0.01451680064201355
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,12,1,128,1,float16,fp8,0,0.014571200311183929
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,12,2,128,1,float16,float16,0,0.014564800262451171
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,12,1,128,1,fp8,fp8,0,0.014542399346828461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,12,2,128,1,float16,fp8,0,0.014590400457382201
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,12,2,128,1,fp8,fp8,0,0.014572800695896148
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,12,4,128,1,float16,float16,0,0.014628799259662628
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,12,4,128,1,float16,fp8,0,0.014584000408649444
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,12,4,128,1,fp8,fp8,0,0.014535999298095703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,12,1,128,1,float16,float16,0,0.11722240447998047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,12,1,128,1,float16,fp8,0,0.140065598487854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,12,2,128,1,float16,float16,0,0.11751199960708618
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,12,2,128,1,float16,fp8,0,0.1398848056793213
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,12,2,128,1,float16,float16,0,0.024740800261497498
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,12,2,128,1,fp8,fp8,0,0.14024640321731568
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,12,4,128,1,float16,float16,0,0.1265663981437683
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,12,4,128,1,float16,fp8,0,0.1400928020477295
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,12,4,128,1,fp8,fp8,0,0.14039839506149293
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,12,12,128,1,float16,float16,0,0.08815839886665344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,12,12,128,1,float16,fp8,0,0.0944271981716156
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,12,12,128,1,fp8,fp8,0,0.09438239932060241
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,12,1,128,1,float16,float16,0,0.06556159853935242
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,12,1,128,1,float16,fp8,0,0.01753759980201721
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,12,1,128,1,float16,fp8,0,0.07595840096473694
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,12,1,128,1,fp8,fp8,0,0.07515680193901061
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,12,12,128,1,float16,float16,0,0.08495039939880371
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,12,2,128,1,float16,fp8,0,0.07569440007209778
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,12,2,128,1,fp8,fp8,0,0.07593600153923034
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,12,4,128,1,float16,fp8,0,0.07596480250358581
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,12,4,128,1,fp8,fp8,0,0.07599840164184571
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,12,12,128,1,float16,float16,0,0.049572798609733584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,12,12,128,1,float16,fp8,0,0.05327519774436951
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,12,12,128,1,fp8,fp8,0,0.052132797241210935
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,12,1,128,1,float16,float16,0,0.03723680078983307
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,12,1,128,1,fp8,fp8,0,0.04320479929447174
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,12,2,128,1,float16,float16,0,0.03788959980010986
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,12,2,128,1,float16,fp8,0,0.04320319890975952
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,12,2,128,1,fp8,fp8,0,0.043217599391937256
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,12,4,128,1,float16,float16,0,0.04042559862136841
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,12,4,128,1,float16,fp8,0,0.04323199987411499
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,12,4,128,1,fp8,fp8,0,0.016519999504089354
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,12,4,128,1,fp8,fp8,0,0.04323360025882721
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,12,12,128,1,float16,float16,0,0.026849600672721862
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,12,12,128,1,float16,fp8,0,0.029854398965835572
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,12,12,128,1,fp8,fp8,0,0.030859199166297913
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,12,1,128,1,float16,float16,0,0.022814400494098663
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,12,1,128,1,fp8,fp8,0,0.026080000400543212
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,12,1,128,1,float16,fp8,0,0.026124799251556398
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,12,2,128,1,float16,float16,0,0.022951999306678773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,12,2,128,1,float16,fp8,0,0.026182401180267333
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,12,2,128,1,fp8,fp8,0,0.02624480128288269
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,12,4,128,1,float16,float16,0,0.024719999730587007
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,12,1,128,1,fp8,fp8,0,0.1395599961280823
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,12,4,128,1,float16,fp8,0,0.026535999774932862
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,12,4,128,1,fp8,fp8,0,0.026158401370048524
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,12,12,128,1,float16,float16,0,0.018643200397491455
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,12,12,128,1,float16,fp8,0,0.02075359970331192
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,12,12,128,1,fp8,fp8,0,0.02072480022907257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,12,1,128,1,float16,float16,0,0.01659200042486191
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,12,1,128,1,float16,fp8,0,0.01863519996404648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,12,1,128,1,fp8,fp8,0,0.01868479996919632
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,12,2,128,1,float16,float16,0,0.016638399660587312
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,12,2,128,1,float16,fp8,0,0.0186271995306015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,12,2,128,1,fp8,fp8,0,0.01866080015897751
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,12,4,128,1,float16,fp8,0,0.018643200397491455
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,12,12,128,1,float16,float16,0,0.012535999715328216
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,12,12,128,1,float16,fp8,0,0.014497600495815277
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,12,12,128,1,fp8,fp8,0,0.01446399986743927
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,12,4,128,1,fp8,fp8,0,0.127019202709198
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,12,1,128,1,float16,fp8,0,0.012441600114107132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,12,4,128,1,float16,float16,0,0.07068960070610046
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,12,1,128,1,fp8,fp8,0,0.012510399520397186
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,12,2,128,1,float16,float16,0,0.012486399710178375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,12,2,128,1,float16,fp8,0,0.012444800138473511
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,12,2,128,1,fp8,fp8,0,0.012534399330615998
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,12,4,128,1,float16,float16,0,0.012545600533485413
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,12,4,128,1,float16,fp8,0,0.012492799758911132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,12,4,128,1,fp8,fp8,0,0.012503999471664428
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,12,12,128,1,float16,float16,0,0.012454400211572647
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,12,12,128,1,float16,fp8,0,0.012451200187206269
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,12,1,128,1,float16,fp8,0,0.043270400166511534
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,12,1,128,1,float16,float16,0,0.012436799705028534
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,12,1,128,1,float16,fp8,0,0.012441600114107132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,12,1,128,1,fp8,fp8,0,0.01242239996790886
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,12,2,128,1,float16,float16,0,0.012425599992275238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,12,2,128,1,float16,fp8,0,0.012505599856376648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,12,4,128,1,float16,float16,0,0.010952000319957734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,12,4,128,1,float16,fp8,0,0.012489599734544754
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,12,4,128,1,fp8,fp8,0,0.012441600114107132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,12,12,128,1,float16,float16,0,0.012508800625801087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,12,12,128,1,float16,fp8,0,0.0124719999730587
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,12,12,128,1,fp8,fp8,0,0.012452799826860428
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,12,1,128,1,float16,float16,0,0.01185920014977455
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,12,1,128,1,fp8,fp8,0,0.012399999797344208
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,12,2,128,1,float16,float16,0,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,12,2,128,1,float16,fp8,0,0.011470399796962738
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,12,2,128,1,fp8,fp8,0,0.011406400054693223
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,12,4,128,1,float16,float16,0,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,12,4,128,1,float16,fp8,0,0.011428800225257874
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,12,4,128,1,fp8,fp8,0,0.011427199840545655
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,12,12,128,1,float16,float16,0,0.011334399878978729
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,12,12,128,1,float16,fp8,0,0.010529600083827972
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,12,12,128,1,fp8,fp8,0,0.011531200259923935
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,12,1,128,1,float16,float16,0,0.01109120026230812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,12,4,128,1,float16,float16,0,0.017451199889183044
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,12,4,128,1,fp8,fp8,0,0.018649600446224213
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,12,2,128,1,float16,float16,0,0.010520000010728836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,12,2,128,1,float16,fp8,0,0.011495999991893768
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,12,2,128,1,fp8,fp8,0,0.011452800035476685
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,12,4,128,1,float16,float16,0,0.010758399963378906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,12,2,128,1,float16,float16,0,0.065830397605896
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,12,4,128,1,fp8,fp8,0,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,12,1,128,1,float16,float16,0,0.09100800156593322
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,12,1,128,1,float16,fp8,0,0.10086560249328613
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,12,1,128,1,fp8,fp8,0,0.10088479518890381
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,12,12,128,1,fp8,fp8,0,0.01252799928188324
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,12,2,128,1,float16,float16,0,0.09186879992485046
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,12,2,128,1,float16,fp8,0,0.10099040269851685
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,12,2,128,1,fp8,fp8,0,0.012441600114107132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,12,2,128,1,fp8,fp8,0,0.10100159645080567
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,12,4,128,1,float16,float16,0,0.09580320119857788
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,12,4,128,1,float16,fp8,0,0.10102880001068115
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,12,12,128,1,float16,float16,0,0.061964797973632815
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,12,4,128,1,fp8,fp8,0,0.10167039632797241
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,12,12,128,1,float16,fp8,0,0.06368640065193176
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,12,12,128,1,fp8,fp8,0,0.06419199705123901
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,12,1,128,1,float16,float16,0,0.04969759881496429
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,12,1,128,1,float16,fp8,0,0.05554400086402893
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,12,1,128,1,fp8,fp8,0,0.054923200607299806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,12,2,128,1,float16,float16,0,0.05070880055427551
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,12,1,128,1,float16,fp8,0,0.012454400211572647
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,12,2,128,1,float16,fp8,0,0.05512319803237915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,12,2,128,1,fp8,fp8,0,0.0555184006690979
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,12,4,128,1,float16,float16,0,0.053166401386260984
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,12,4,128,1,float16,fp8,0,0.055225598812103274
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,12,4,128,1,fp8,fp8,0,0.0555840015411377
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,12,12,128,1,float16,float16,0,0.03348160088062287
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,12,12,128,1,float16,fp8,0,0.03705919981002807
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,12,1,128,1,float16,float16,0,0.03035840094089508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,12,1,128,1,fp8,fp8,0,0.0329039990901947
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,12,1,128,1,float16,fp8,0,0.032876798510551454
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,12,2,128,1,float16,float16,0,0.03048959970474243
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,12,2,128,1,float16,fp8,0,0.03300639986991882
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,12,2,128,1,fp8,fp8,0,0.032897600531578065
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,12,4,128,1,float16,float16,0,0.030958399176597595
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,12,4,128,1,float16,fp8,0,0.03289600014686585
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,12,12,128,1,float16,float16,0,0.020691199600696562
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,12,1,128,1,float16,fp8,0,0.010542400181293488
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,12,1,128,1,fp8,fp8,0,0.010571199655532836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,12,12,128,1,fp8,fp8,0,0.022763200104236603
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,12,1,128,1,float16,float16,0,0.01868640035390854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,12,1,128,1,float16,fp8,0,0.020694400370121
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,12,2,128,1,float16,float16,0,0.01873600035905838
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,12,1,128,1,fp8,fp8,0,0.020608000457286835
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,12,2,128,1,float16,fp8,0,0.020659199357032774
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,12,2,128,1,fp8,fp8,0,0.02072640061378479
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,12,4,128,1,float16,float16,0,0.020904000103473663
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,12,4,128,1,float16,fp8,0,0.020623999834060668
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,12,4,128,1,fp8,fp8,0,0.020627200603485107
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,12,12,128,1,float16,float16,0,0.01462559998035431
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,12,12,128,1,float16,fp8,0,0.016553600132465363
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,12,12,128,1,fp8,fp8,0,0.016649599373340606
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,12,1,128,1,float16,fp8,0,0.014564800262451171
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,12,1,128,1,fp8,fp8,0,0.014548799395561219
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,12,2,128,1,float16,float16,0,0.014548799395561219
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,12,2,128,1,float16,fp8,0,0.014575999975204468
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,12,2,128,1,fp8,fp8,0,0.014528000354766845
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,12,4,128,1,float16,float16,0,0.014545600116252898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,12,4,128,1,float16,fp8,0,0.015072000026702882
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,12,4,128,1,fp8,fp8,0,0.015270400047302245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,12,12,128,1,float16,float16,0,0.012414400279521943
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,12,12,128,1,float16,fp8,0,0.012401600182056428
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,12,12,128,1,fp8,fp8,0,0.012520000338554382
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,12,1,128,1,float16,float16,0,0.010363200306892395
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,12,1,128,1,float16,fp8,0,0.010572800040245056
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,12,1,128,1,fp8,fp8,0,0.01043040007352829
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,12,2,128,1,float16,float16,0,0.010539200156927109
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,12,2,128,1,fp8,fp8,0,0.010486400127410889
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,12,2,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,12,4,128,1,float16,float16,0,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,12,4,128,1,float16,fp8,0,0.010555200278759003
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,12,4,128,1,fp8,fp8,0,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,12,12,128,1,float16,float16,0,0.010576000064611435
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,12,12,128,1,float16,fp8,0,0.01045759990811348
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,12,12,128,1,fp8,fp8,0,0.011072000116109848
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,12,1,128,1,float16,float16,0,0.010424000024795533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,12,1,128,1,float16,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,12,1,128,1,fp8,fp8,0,0.010414399951696397
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,12,2,128,1,float16,float16,0,0.010388799756765366
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,12,2,128,1,float16,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,12,2,128,1,fp8,fp8,0,0.01045759990811348
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,12,4,128,1,float16,fp8,0,0.01043199971318245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,12,4,128,1,float16,float16,0,0.010452800244092942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,12,4,128,1,fp8,fp8,0,0.010425599664449692
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,12,12,128,1,float16,float16,0,0.01043040007352829
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,12,12,128,1,float16,fp8,0,0.010476800054311753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,12,12,128,1,fp8,fp8,0,0.010475199669599533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,12,1,128,1,float16,float16,0,0.010384000092744827
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,12,4,128,1,fp8,fp8,0,0.033048000931739804
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,12,1,128,1,float16,fp8,0,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,12,1,128,1,fp8,fp8,0,0.010361599922180175
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,12,2,128,1,float16,fp8,0,0.010496000200510025
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,12,2,128,1,fp8,fp8,0,0.01043040007352829
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,12,12,128,1,float16,fp8,0,0.022745600342750548
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,12,4,128,1,float16,float16,0,0.01045759990811348
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,12,4,128,1,float16,fp8,0,0.010395199805498124
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,12,12,128,1,float16,float16,0,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,12,12,128,1,float16,fp8,0,0.010372799634933472
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,12,12,128,1,fp8,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,12,1,128,1,float16,float16,0,0.01037919968366623
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,12,1,128,1,float16,fp8,0,0.010382399708032609
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,12,2,128,1,float16,float16,0,0.010452800244092942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,12,1,128,1,fp8,fp8,0,0.010367999970912933
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,12,1,128,1,float16,float16,0,0.0124719999730587
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,12,2,128,1,float16,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,12,2,128,1,fp8,fp8,0,0.010375999659299851
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,12,4,128,1,float16,float16,0,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,12,4,128,1,float16,fp8,0,0.010375999659299851
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,12,4,128,1,fp8,fp8,0,0.010372799634933472
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,12,1,128,1,float16,float16,0,0.07723519802093506
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,12,1,128,1,float16,float16,0,0.014539200067520141
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,12,1,128,1,fp8,fp8,0,0.080731201171875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,12,1,128,1,float16,fp8,0,0.08148800134658814
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,12,2,128,1,float16,fp8,0,0.08140640258789063
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,12,2,128,1,fp8,fp8,0,0.08088319897651672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,12,4,128,1,float16,float16,0,0.08091999888420105
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,12,4,128,1,float16,fp8,0,0.0808080017566681
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,12,4,128,1,fp8,fp8,0,0.08203039765357971
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,12,12,128,1,float16,float16,0,0.04690879881381989
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,12,12,128,1,float16,fp8,0,0.04975839853286743
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,12,12,128,1,fp8,fp8,0,0.049377599358558656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,12,1,128,1,float16,float16,0,0.04383200109004974
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,12,1,128,1,float16,fp8,0,0.045291200280189514
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,12,1,128,1,fp8,fp8,0,0.045291200280189514
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,12,2,128,1,float16,float16,0,0.04334079921245575
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,12,2,128,1,float16,fp8,0,0.04531840085983276
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,12,2,128,1,fp8,fp8,0,0.04529759883880615
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,12,4,128,1,float16,float16,0,0.04517599940299988
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,12,4,128,1,float16,fp8,0,0.04528799951076508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,12,4,128,1,fp8,fp8,0,0.04529759883880615
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,12,12,128,1,float16,float16,0,0.028808000683784484
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,12,12,128,1,float16,fp8,0,0.029204800724983215
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,12,12,128,1,fp8,fp8,0,0.029963201284408568
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,12,1,128,1,float16,float16,0,0.02688319981098175
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,12,1,128,1,float16,fp8,0,0.02693600058555603
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,12,1,128,1,fp8,fp8,0,0.027246400713920593
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,12,2,128,1,float16,fp8,0,0.028167998790740965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,12,12,128,1,fp8,fp8,0,0.037062400579452516
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,12,2,128,1,fp8,fp8,0,0.02820639908313751
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,12,4,128,1,float16,float16,0,0.027527999877929688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,12,4,128,1,float16,fp8,0,0.028347200155258177
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,12,4,128,1,fp8,fp8,0,0.027489599585533143
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,12,12,128,1,float16,float16,0,0.01865279972553253
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,12,12,128,1,float16,fp8,0,0.01865600049495697
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,12,1,128,1,float16,float16,0,0.016659200191497803
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,12,12,128,1,fp8,fp8,0,0.018665599822998046
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,12,2,128,1,float16,float16,0,0.01045759990811348
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,12,1,128,1,fp8,fp8,0,0.018638400733470915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,12,2,128,1,float16,float16,0,0.017107200622558594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,12,2,128,1,float16,fp8,0,0.018612800538539885
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,12,2,128,1,fp8,fp8,0,0.018561600148677825
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,12,4,128,1,fp8,fp8,0,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,12,4,128,1,float16,float16,0,0.018083199858665466
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,12,4,128,1,float16,fp8,0,0.018559999763965607
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,12,12,128,1,float16,float16,0,0.014476799964904785
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,12,12,128,1,float16,fp8,0,0.01459839940071106
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,12,4,128,1,fp8,fp8,0,0.01846559941768646
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,12,12,128,1,fp8,fp8,0,0.01449120044708252
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,12,1,128,1,float16,float16,0,0.013438400626182557
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,12,1,128,1,float16,fp8,0,0.014443199336528777
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,12,1,128,1,fp8,fp8,0,0.01404159963130951
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,12,2,128,1,float16,float16,0,0.013846400380134582
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,12,2,128,1,float16,fp8,0,0.014534400403499603
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,12,2,128,1,fp8,fp8,0,0.01419519931077957
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,12,4,128,1,float16,float16,0,0.01353919953107834
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,12,4,128,1,float16,fp8,0,0.014504000544548035
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,12,4,128,1,fp8,fp8,0,0.014289599657058717
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,12,12,128,1,float16,float16,0,0.010515200346708298
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,12,12,128,1,float16,fp8,0,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,12,12,128,1,fp8,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,12,1,128,1,float16,fp8,0,0.010398399829864503
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,12,2,128,1,float16,float16,0,0.07786880135536194
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,12,1,128,1,fp8,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,12,2,128,1,float16,float16,0,0.010385599732398988
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,12,2,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,12,2,128,1,fp8,fp8,0,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,12,4,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,12,4,128,1,fp8,fp8,0,0.010496000200510025
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,12,12,128,1,float16,float16,0,0.010486400127410889
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,12,12,128,1,float16,fp8,0,0.010486400127410889
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,12,12,128,1,fp8,fp8,0,0.010417599976062775
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,12,1,128,1,float16,float16,0,0.010417599976062775
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,12,1,128,1,float16,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,12,1,128,1,fp8,fp8,0,0.010428799688816071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,12,2,128,1,float16,float16,0,0.010428799688816071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,12,2,128,1,fp8,fp8,0,0.01048320010304451
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,12,2,128,1,float16,fp8,0,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,12,4,128,1,float16,float16,0,0.010491199791431427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,12,4,128,1,float16,fp8,0,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,12,4,128,1,fp8,fp8,0,0.010417599976062775
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,12,12,128,1,float16,float16,0,0.01045759990811348
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,12,12,128,1,float16,fp8,0,0.010427200049161912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,12,12,128,1,fp8,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,12,1,128,1,float16,float16,0,0.00952640026807785
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,12,1,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,12,2,128,1,float16,float16,0,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,12,1,128,1,fp8,fp8,0,0.009452799707651139
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,12,2,128,1,float16,fp8,0,0.0105103999376297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,12,2,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,12,4,128,1,float16,float16,0,0.010444799810647965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,12,4,128,1,float16,fp8,0,0.010424000024795533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,12,4,128,1,fp8,fp8,0,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,12,12,128,1,float16,float16,0,0.010425599664449692
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,12,2,128,1,float16,float16,0,0.026843199133872987
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,12,12,128,1,fp8,fp8,0,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,12,1,128,1,float16,float16,0,0.008928000181913375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,12,1,128,1,float16,fp8,0,0.008510400354862214
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,12,1,128,1,fp8,fp8,0,0.009044799953699112
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,12,2,128,1,float16,fp8,0,0.010361599922180175
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,12,2,128,1,fp8,fp8,0,0.01037440001964569
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,12,4,128,1,float16,fp8,0,0.010360000282526016
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,12,1,128,1,float16,fp8,0,0.01773280054330826
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,12,4,128,1,fp8,fp8,0,0.010384000092744827
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,12,1,128,1,float16,fp8,0,0.07252799868583679
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,12,1,128,1,float16,float16,0,0.07463679909706115
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,12,1,128,1,fp8,fp8,0,0.072707200050354
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,12,2,128,1,float16,float16,0,0.07457919716835022
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,12,4,128,1,float16,fp8,0,0.011899200081825257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,12,2,128,1,float16,fp8,0,0.07238079905509949
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,12,2,128,1,fp8,fp8,0,0.07230240106582642
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,12,4,128,1,float16,float16,0,0.07561759948730469
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,12,4,128,1,float16,fp8,0,0.07242239713668823
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,12,1,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,12,4,128,1,fp8,fp8,0,0.07264479994773865
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,12,12,128,1,float16,float16,0,0.043932801485061644
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,12,4,128,1,float16,float16,0,0.010353600233793258
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,12,12,128,1,float16,fp8,0,0.043115198612213135
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,12,1,128,1,float16,float16,0,0.042247998714447024
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,12,1,128,1,float16,fp8,0,0.041231998801231386
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,12,2,128,1,float16,float16,0,0.0424591988325119
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,12,2,128,1,fp8,fp8,0,0.04116159975528717
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,12,4,128,1,float16,float16,0,0.04313279986381531
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,12,4,128,1,float16,fp8,0,0.041223999857902524
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,12,4,128,1,fp8,fp8,0,0.041280001401901245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,12,12,128,1,float16,float16,0,0.02686080038547516
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,12,12,128,1,float16,fp8,0,0.026846399903297423
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,12,12,128,1,fp8,fp8,0,0.026807999610900878
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,12,1,128,1,float16,float16,0,0.026830399036407472
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,12,1,128,1,float16,fp8,0,0.025028800964355467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,12,1,128,1,fp8,fp8,0,0.025143998861312866
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,12,2,128,1,float16,float16,0,0.026870399713516235
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,12,12,128,1,float16,fp8,0,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,12,2,128,1,float16,fp8,0,0.024777600169181825
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,12,2,128,1,fp8,fp8,0,0.025172799825668335
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,12,4,128,1,float16,float16,0,0.026824000477790832
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,12,2,128,1,float16,float16,0,0.008430399745702744
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,12,4,128,1,float16,fp8,0,0.025172799825668335
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,12,4,128,1,fp8,fp8,0,0.02489279955625534
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,12,12,128,1,float16,float16,0,0.01658080071210861
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,12,12,128,1,float16,fp8,0,0.016676799952983858
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,12,12,128,1,fp8,fp8,0,0.016646400094032288
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,12,4,128,1,float16,float16,0,0.008795200288295746
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,12,1,128,1,float16,float16,0,0.016579200327396394
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,12,1,128,1,float16,fp8,0,0.016625599563121797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,12,2,128,1,float16,float16,0,0.016572800278663636
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,12,2,128,1,float16,fp8,0,0.01658080071210861
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,12,2,128,1,fp8,fp8,0,0.016654400527477263
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,12,4,128,1,float16,fp8,0,0.016510400176048278
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,12,4,128,1,fp8,fp8,0,0.01658560037612915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,12,12,128,1,float16,float16,0,0.013073599338531494
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,12,12,128,1,fp8,fp8,0,0.012505599856376648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,12,12,128,1,float16,fp8,0,0.012488000094890594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,12,1,128,1,float16,float16,0,0.012873600423336028
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,12,1,128,1,float16,fp8,0,0.01249760016798973
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,12,1,128,1,fp8,fp8,0,0.012476799637079239
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,12,2,128,1,float16,float16,0,0.012510399520397186
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,12,2,128,1,float16,fp8,0,0.012508800625801087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,12,2,128,1,fp8,fp8,0,0.012476799637079239
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,12,4,128,1,float16,fp8,0,0.01252480000257492
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,12,4,128,1,fp8,fp8,0,0.012544000148773193
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,12,12,128,1,float16,float16,0,0.010438399761915207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,12,12,128,1,float16,fp8,0,0.01048479974269867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,12,12,128,1,fp8,fp8,0,0.04334239959716797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,12,12,128,1,fp8,fp8,0,0.01043199971318245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,12,1,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,12,1,128,1,fp8,fp8,0,0.041124799847602846
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,12,1,128,1,fp8,fp8,0,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,12,2,128,1,float16,float16,0,0.010492800176143647
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,12,2,128,1,float16,fp8,0,0.04118399918079376
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,12,2,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,12,4,128,1,float16,float16,0,0.010358399897813796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,12,4,128,1,float16,fp8,0,0.010435199737548828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,12,4,128,1,fp8,fp8,0,0.01037919968366623
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,12,12,128,1,float16,float16,0,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,12,12,128,1,float16,fp8,0,0.009657599776983262
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,12,12,128,1,fp8,fp8,0,0.010424000024795533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,12,1,128,1,float16,float16,0,0.010411199927330018
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,12,1,128,1,float16,fp8,0,0.009628800302743911
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,12,1,128,1,fp8,fp8,0,0.010412800312042236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,12,2,128,1,float16,float16,0,0.010366400331258773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,12,2,128,1,float16,fp8,0,0.009801600128412247
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,12,2,128,1,fp8,fp8,0,0.010411199927330018
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,12,4,128,1,float16,float16,0,0.010369600355625152
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,12,4,128,1,float16,fp8,0,0.010414399951696397
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,12,12,128,1,float16,float16,0,0.01043040007352829
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,12,4,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,12,12,128,1,float16,fp8,0,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,12,1,128,1,float16,float16,0,0.010326399654150008
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,12,1,128,1,float16,fp8,0,0.010438399761915207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,12,1,128,1,fp8,fp8,0,0.008367999643087386
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,12,2,128,1,float16,float16,0,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,12,2,128,1,float16,fp8,0,0.008419200032949447
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,12,2,128,1,fp8,fp8,0,0.010475199669599533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,12,4,128,1,float16,float16,0,0.008870399743318557
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,12,4,128,1,float16,fp8,0,0.008727999776601792
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,12,4,128,1,fp8,fp8,0,0.008406399935483932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,12,12,128,1,float16,float16,0,0.010417599976062775
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,12,12,128,1,float16,fp8,0,0.010132800042629241
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,12,12,128,1,fp8,fp8,0,0.008593600243330002
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,12,1,128,1,float16,float16,0,0.010344000160694122
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,12,1,128,1,float16,fp8,0,0.008422400057315826
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,12,1,128,1,fp8,fp8,0,0.009347199648618697
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,12,2,128,1,float16,float16,0,0.00881119966506958
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,12,2,128,1,float16,fp8,0,0.009355200082063675
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,12,2,128,1,fp8,fp8,0,0.008417599648237229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,12,1,128,1,fp8,fp8,0,0.016628800332546233
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,12,4,128,1,float16,float16,0,0.009009599685668945
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,12,4,128,1,float16,fp8,0,0.008428800106048583
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,12,4,128,1,float16,float16,0,0.016564799845218657
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,12,4,128,1,fp8,fp8,0,0.00846560001373291
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,12,1,128,1,float16,float16,0,0.0725600004196167
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,12,1,128,1,float16,fp8,0,0.06799520254135132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,12,1,128,1,fp8,fp8,0,0.06828160285949707
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,12,2,128,1,float16,float16,0,0.07265440225601197
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,12,2,128,1,float16,fp8,0,0.06839519739151001
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,12,2,128,1,fp8,fp8,0,0.06800159811973572
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,12,4,128,1,float16,float16,0,0.07303839921951294
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,12,4,128,1,float16,fp8,0,0.06852480173110961
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,12,4,128,1,fp8,fp8,0,0.06820319890975952
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,12,12,128,1,float16,float16,0,0.04249120056629181
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,12,12,128,1,float16,fp8,0,0.039208000898361205
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,12,1,128,1,float16,float16,0,0.041678398847579956
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,12,4,128,1,float16,float16,0,0.012676799297332763
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,12,1,128,1,fp8,fp8,0,0.03911199867725372
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,12,2,128,1,float16,float16,0,0.042075198888778684
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,12,2,128,1,float16,fp8,0,0.03917919993400574
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,12,2,128,1,fp8,fp8,0,0.039192000031471254
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,12,4,128,1,float16,fp8,0,0.039131200313568114
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,12,4,128,1,float16,float16,0,0.041524800658226016
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,12,4,128,1,fp8,fp8,0,0.039087998867034915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,12,12,128,1,float16,float16,0,0.02627359926700592
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,12,12,128,1,float16,fp8,0,0.02479359954595566
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,12,12,128,1,fp8,fp8,0,0.02476480007171631
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,12,2,128,1,fp8,fp8,0,0.010499200224876404
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,12,1,128,1,float16,float16,0,0.024934400618076325
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,12,1,128,1,float16,fp8,0,0.02481600046157837
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,12,1,128,1,fp8,fp8,0,0.02480800002813339
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,12,2,128,1,float16,fp8,0,0.02479040026664734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,12,2,128,1,fp8,fp8,0,0.02470560073852539
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,12,4,128,1,float16,float16,0,0.025600001215934753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,12,4,128,1,float16,fp8,0,0.02451840043067932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,12,4,128,1,fp8,fp8,0,0.024740800261497498
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,12,12,128,1,float16,float16,0,0.016702400147914888
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,12,12,128,1,float16,fp8,0,0.016603200137615202
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,12,12,128,1,fp8,fp8,0,0.016064000129699708
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,12,1,128,1,float16,float16,0,0.016601599752902985
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,12,1,128,1,float16,fp8,0,0.015174399316310882
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,12,1,128,1,fp8,fp8,0,0.01659359931945801
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,12,2,128,1,float16,float16,0,0.016598400473594666
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,12,2,128,1,float16,fp8,0,0.016524800658226015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,12,2,128,1,fp8,fp8,0,0.015646399557590486
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,12,12,128,1,fp8,fp8,0,0.009254399687051773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,12,4,128,1,float16,float16,0,0.016590400040149687
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,12,4,128,1,float16,fp8,0,0.015569600462913512
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,12,4,128,1,fp8,fp8,0,0.015907199680805208
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,12,12,128,1,float16,float16,0,0.014590400457382201
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,12,12,128,1,float16,fp8,0,0.012495999783277511
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,12,12,128,1,fp8,fp8,0,0.0124208003282547
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,12,1,128,1,float16,float16,0,0.01252480000257492
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,12,1,128,1,float16,fp8,0,0.012540799379348756
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,12,1,128,1,fp8,fp8,0,0.01242400035262108
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,12,2,128,1,float16,float16,0,0.013208000361919403
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,12,2,128,1,float16,fp8,0,0.012508800625801087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,12,2,128,1,fp8,fp8,0,0.012503999471664428
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,12,4,128,1,float16,float16,0,0.014511999487876893
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,12,4,128,1,fp8,fp8,0,0.01250240057706833
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,12,4,128,1,float16,fp8,0,0.012564800679683685
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,12,12,128,1,float16,fp8,0,0.010388799756765366
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,12,12,128,1,fp8,fp8,0,0.010390400141477584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,12,12,128,1,float16,float16,0,0.01045759990811348
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,12,1,128,1,float16,float16,0,0.010358399897813796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,12,1,128,1,fp8,fp8,0,0.010471999645233154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,12,2,128,1,fp8,fp8,0,0.010372799634933472
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,12,2,128,1,float16,fp8,0,0.010396800190210342
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,12,4,128,1,float16,float16,0,0.010491199791431427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,12,4,128,1,float16,fp8,0,0.010345599800348281
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,12,4,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,12,12,128,1,float16,float16,0,0.01051200032234192
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,12,12,128,1,float16,fp8,0,0.00974240005016327
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,12,12,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,12,1,128,1,float16,float16,0,0.010284800082445145
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,12,1,128,1,float16,fp8,0,0.010385599732398988
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,12,1,128,1,fp8,fp8,0,0.009057600051164627
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,12,2,128,1,float16,float16,0,0.010425599664449692
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,12,2,128,1,float16,fp8,0,0.00947519987821579
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,12,2,128,1,fp8,fp8,0,0.010673599690198899
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,12,12,128,1,fp8,fp8,0,0.03914400041103363
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,12,4,128,1,float16,fp8,0,0.010311999917030334
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,12,4,128,1,fp8,fp8,0,0.01037919968366623
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,12,1,128,1,float16,fp8,0,0.03919520080089569
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,12,12,128,1,float16,float16,0,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,12,12,128,1,float16,fp8,0,0.010372799634933472
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,12,12,128,1,fp8,fp8,0,0.008372800052165985
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,12,1,128,1,float16,float16,0,0.009297599643468856
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,12,1,128,1,float16,fp8,0,0.009884800016880035
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,12,1,128,1,fp8,fp8,0,0.008423999696969987
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,12,2,128,1,float16,fp8,0,0.010363200306892395
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,12,2,128,1,fp8,fp8,0,0.008470399677753449
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,12,4,128,1,float16,float16,0,0.009398400038480758
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,12,4,128,1,fp8,fp8,0,0.008908800035715102
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,12,4,128,1,float16,fp8,0,0.008417599648237229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,12,12,128,1,float16,float16,0,0.010388799756765366
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,12,12,128,1,float16,fp8,0,0.008904000371694564
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,12,12,128,1,fp8,fp8,0,0.008406399935483932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,12,1,128,1,float16,float16,0,0.009403199702501298
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,12,1,128,1,float16,fp8,0,0.008508799970149994
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,12,2,128,1,float16,float16,0,0.025067201256752013
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,12,1,128,1,fp8,fp8,0,0.008819200098514557
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,12,2,128,1,float16,float16,0,0.0083856001496315
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,12,2,128,1,float16,fp8,0,0.008780799806118011
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,12,2,128,1,fp8,fp8,0,0.008376000076532364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,12,4,128,1,float16,float16,0,0.009206400066614152
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,12,4,128,1,float16,fp8,0,0.00841120034456253
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,12,4,128,1,fp8,fp8,0,0.008814399689435959
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,12,1,128,1,float16,fp8,0,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,12,2,128,1,float16,float16,0,0.01040000021457672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,12,4,128,1,float16,float16,0,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,12,2,128,1,float16,float16,0,0.009228800237178803
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,12,1,128,1,float16,fp8,0,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,8,1,128,1,float16,fp8,0,2.9107872009277345
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,8,1,128,1,float16,float16,0,3.4798511505126952
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,8,1,128,1,fp8,fp8,0,2.9067968368530273
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,8,2,128,1,float16,float16,0,3.554118347167969
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,8,2,128,1,float16,fp8,0,2.915782356262207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,8,2,128,1,fp8,fp8,0,2.9030384063720702
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,8,4,128,1,float16,fp8,0,2.912723159790039
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,8,128,1,float16,float16,0,1.7726160049438477
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,8,4,128,1,float16,float16,0,3.511249542236328
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,8,4,128,1,fp8,fp8,0,3.101587104797363
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,8,128,1,float16,fp8,0,1.7896480560302734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,8,128,1,fp8,fp8,0,1.5249072074890138
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,1,128,1,float16,float16,0,1.6785232543945312
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,1,128,1,float16,fp8,0,1.5152079582214355
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,1,128,1,fp8,fp8,0,1.5359007835388183
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,2,128,1,float16,float16,0,1.805388832092285
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,2,128,1,fp8,fp8,0,1.5225791931152344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,2,128,1,float16,fp8,0,1.7618303298950195
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,4,128,1,float16,fp8,0,1.5233535766601562
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,4,128,1,float16,float16,0,1.7363199234008788
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,4,128,1,fp8,fp8,0,1.656011199951172
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,8,128,1,fp8,fp8,0,0.9724944114685059
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,1,128,1,float16,float16,0,0.9037008285522461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,1,128,1,float16,fp8,0,0.8492624282836914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,1,128,1,fp8,fp8,0,0.8240143775939941
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,2,128,1,float16,float16,0,0.9771856307983399
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,2,128,1,float16,fp8,0,0.8461872100830078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,2,128,1,fp8,fp8,0,0.9404911994934082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,4,128,1,float16,float16,0,0.9511407852172852
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,4,128,1,fp8,fp8,0,0.8254976272583008
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,8,128,1,float16,float16,0,0.5759024143218994
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,8,128,1,float16,fp8,0,0.49043998718261717
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,8,128,1,fp8,fp8,0,0.4848959922790527
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,1,128,1,float16,fp8,0,0.47499680519104004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,1,128,1,fp8,fp8,0,0.514958381652832
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,2,128,1,float16,float16,0,0.5101280212402344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,1,128,1,float16,float16,0,0.5229328155517579
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,2,128,1,float16,fp8,0,0.48271999359130857
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,4,128,1,fp8,fp8,0,0.4935999870300293
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,4,128,1,float16,fp8,0,0.47498559951782227
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,4,128,1,float16,float16,0,0.5304368019104004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,8,128,1,float16,float16,0,0.9335503578186035
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,8,128,1,float16,fp8,0,0.8292799949645996
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,8,1,128,1,float16,float16,0,1.956827163696289
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,8,1,128,1,fp8,fp8,0,1.7259840011596679
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,8,1,128,1,float16,fp8,0,1.7590351104736328
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,8,2,128,1,float16,float16,0,1.9211599349975585
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,8,2,128,1,float16,fp8,0,1.720368003845215
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,8,2,128,1,fp8,fp8,0,1.7233247756958008
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,8,128,1,float16,fp8,0,0.9111552238464355
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,8,128,1,float16,float16,0,1.1602144241333008
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,8,128,1,fp8,fp8,0,0.9126015663146972
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,8,4,128,1,float16,fp8,0,1.7261680603027343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,2,128,1,fp8,fp8,0,0.47442078590393066
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,8,4,128,1,float16,float16,0,2.0800880432128905
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,1,128,1,float16,float16,0,0.9915328025817871
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,1,128,1,float16,fp8,0,1.0658063888549805
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,2,128,1,float16,fp8,0,0.9111807823181153
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,2,128,1,float16,float16,0,0.9904848098754883
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,2,128,1,fp8,fp8,0,1.1118080139160156
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,4,128,1,float16,float16,0,1.0305472373962403
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,8,128,1,float16,float16,0,0.5624303817749023
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,4,128,1,float16,fp8,0,0.9736895561218262
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,8,128,1,float16,fp8,0,0.6110415935516358
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,4,128,1,fp8,fp8,0,0.9128288269042969
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,8,128,1,fp8,fp8,0,0.549067211151123
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,4,128,1,float16,fp8,0,0.8383760452270508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,1,128,1,float16,float16,0,0.5724400043487549
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,1,128,1,float16,fp8,0,0.5151088237762451
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,1,128,1,fp8,fp8,0,0.5154560089111329
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,2,128,1,float16,float16,0,0.5347184181213379
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,2,128,1,fp8,fp8,0,0.5037536144256591
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,4,128,1,float16,fp8,0,0.5077455997467041
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,8,128,1,float16,float16,0,0.32575359344482424
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,4,128,1,fp8,fp8,0,0.5109983921051026
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,8,128,1,float16,fp8,0,0.29834880828857424
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,8,128,1,fp8,fp8,0,0.29936800003051756
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,1,128,1,float16,float16,0,0.31083359718322756
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,1,128,1,float16,fp8,0,0.2984911918640137
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,1,128,1,fp8,fp8,0,0.29817121028900145
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,2,128,1,float16,fp8,0,0.29865279197692873
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,2,128,1,fp8,fp8,0,0.2996848106384277
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,4,128,1,float16,float16,0,0.3165168046951294
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,4,128,1,float16,fp8,0,0.29794399738311766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,4,128,1,fp8,fp8,0,0.2988111972808838
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,1,128,1,fp8,fp8,0,0.9154064178466796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,8,1,128,1,float16,fp8,0,1.2471391677856445
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,8,1,128,1,float16,float16,0,1.4005087852478026
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,8,1,128,1,fp8,fp8,0,1.2461423873901367
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,2,128,1,float16,fp8,0,0.5396959781646729
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,4,128,1,float16,float16,0,0.5692319869995117
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,8,2,128,1,float16,float16,0,1.3495823860168457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,8,2,128,1,float16,fp8,0,1.3997232437133789
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,8,4,128,1,fp8,fp8,0,1.7270751953125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,8,2,128,1,fp8,fp8,0,1.2455360412597656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,2,128,1,float16,float16,0,0.3109407901763916
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,8,128,1,float16,float16,0,0.7729119777679443
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,8,4,128,1,float16,float16,0,1.3918800354003906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,8,128,1,float16,fp8,0,0.6664495944976807
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,8,128,1,fp8,fp8,0,0.6664175987243652
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,1,128,1,float16,float16,0,0.7137839794158936
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,8,4,128,1,fp8,fp8,0,1.4449888229370118
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,1,128,1,float16,fp8,0,0.699348783493042
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,1,128,1,fp8,fp8,0,0.7560976028442383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,2,128,1,float16,fp8,0,0.6638688087463379
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,2,128,1,fp8,fp8,0,0.6646336078643799
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,4,128,1,float16,float16,0,0.7588751792907715
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,8,128,1,float16,float16,0,0.4129648208618164
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,8,128,1,float16,fp8,0,0.37660319805145265
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,4,128,1,float16,fp8,0,0.6692287921905518
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,4,128,1,fp8,fp8,0,0.6939023971557617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,8,128,1,fp8,fp8,0,0.39750559329986573
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,1,128,1,float16,float16,0,0.41201119422912597
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,1,128,1,float16,fp8,0,0.3728496074676514
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,1,128,1,fp8,fp8,0,0.37160799503326414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,2,128,1,float16,fp8,0,0.38283839225769045
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,2,128,1,fp8,fp8,0,0.3970655918121338
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,4,128,1,float16,float16,0,0.42371358871459963
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,8,128,1,float16,float16,0,0.25703840255737304
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,4,128,1,fp8,fp8,0,0.37305440902709963
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,8,128,1,float16,fp8,0,0.24890239238739015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,8,128,1,fp8,fp8,0,0.24694559574127198
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,1,128,1,float16,float16,0,0.2466223955154419
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,1,128,1,float16,fp8,0,0.22985761165618895
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,1,128,1,fp8,fp8,0,0.2483520030975342
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,2,128,1,float16,float16,0,0.252508807182312
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,2,128,1,fp8,fp8,0,0.22997119426727294
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,4,128,1,float16,fp8,0,0.23809280395507812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,4,128,1,float16,float16,0,0.24550559520721435
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,4,128,1,fp8,fp8,0,0.22992000579833985
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,8,4,128,1,float16,fp8,0,1.2475551605224608
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,2,128,1,float16,float16,0,0.7170415878295898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,8,1,128,1,float16,fp8,0,1.6219087600708009
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,8,1,128,1,float16,float16,0,1.7561887741088866
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,8,1,128,1,fp8,fp8,0,1.612785530090332
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,8,2,128,1,float16,float16,0,1.744865608215332
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,2,128,1,float16,float16,0,0.39624800682067873
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,4,128,1,float16,fp8,0,0.3738303899765015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,8,2,128,1,float16,fp8,0,1.617193603515625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,2,128,1,float16,fp8,0,0.2348720073699951
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,8,128,1,float16,float16,0,0.9845647811889648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,8,2,128,1,fp8,fp8,0,1.6189727783203125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,8,4,128,1,float16,fp8,0,1.612723159790039
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,8,128,1,float16,fp8,0,0.8444656372070313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,8,128,1,fp8,fp8,0,0.8452303886413575
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,8,4,128,1,float16,float16,0,1.9561344146728517
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,8,4,128,1,fp8,fp8,0,1.8077999114990235
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,1,128,1,float16,float16,0,0.8991168022155762
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,1,128,1,fp8,fp8,0,0.8434672355651855
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,1,128,1,float16,fp8,0,0.9086319923400878
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,2,128,1,float16,float16,0,0.9013615608215332
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,2,128,1,float16,fp8,0,0.8418815612792969
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,4,128,1,float16,fp8,0,0.867255973815918
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,8,128,1,float16,float16,0,0.5091311931610107
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,4,128,1,float16,float16,0,1.0379504203796386
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,8,128,1,fp8,fp8,0,0.45742239952087405
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,1,128,1,float16,float16,0,0.47773280143737795
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,4,128,1,fp8,fp8,0,0.8990336418151855
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,1,128,1,float16,fp8,0,0.46012320518493655
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,1,128,1,fp8,fp8,0,0.4810160160064697
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,2,128,1,float16,fp8,0,0.4561408042907715
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,2,128,1,float16,float16,0,0.5586575984954834
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,2,128,1,fp8,fp8,0,0.457041597366333
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,4,128,1,float16,float16,0,0.49503521919250487
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,4,128,1,float16,fp8,0,0.46175518035888674
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,8,128,1,float16,float16,0,0.304803204536438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,8,128,1,fp8,fp8,0,0.26486399173736574
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,1,128,1,float16,float16,0,0.27377440929412844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,1,128,1,float16,fp8,0,0.26360158920288085
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,1,128,1,fp8,fp8,0,0.2697567939758301
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,2,128,1,float16,fp8,0,0.2669903993606567
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,2,128,1,fp8,fp8,0,0.2628943920135498
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,4,128,1,float16,float16,0,0.28244800567626954
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,4,128,1,float16,fp8,0,0.26363520622253417
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,4,128,1,fp8,fp8,0,0.26308000087738037
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,8,128,1,float16,fp8,0,0.16791199445724486
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,8,128,1,fp8,fp8,0,0.16781760454177858
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,1,128,1,float16,float16,0,0.17337919473648072
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,1,128,1,float16,fp8,0,0.16762080192565917
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,1,128,1,fp8,fp8,0,0.16694719791412355
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,2,128,1,float16,float16,0,0.173579204082489
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,2,128,1,float16,fp8,0,0.16698399782180787
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,2,128,1,fp8,fp8,0,0.1681839942932129
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,2,128,1,fp8,fp8,0,0.8429967880249023
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,4,128,1,float16,float16,0,0.1755743980407715
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,4,128,1,float16,fp8,0,0.16755839586257934
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,8,128,1,float16,fp8,0,0.47643041610717773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,8,1,128,1,float16,float16,0,1.0217040061950684
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,8,1,128,1,float16,fp8,0,0.978604793548584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,8,128,1,float16,fp8,0,0.30945920944213867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,4,128,1,fp8,fp8,0,0.4819664001464844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,2,128,1,float16,float16,0,0.2699007987976074
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,8,128,1,float16,float16,0,0.18595839738845826
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,8,2,128,1,float16,fp8,0,0.9773103713989257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,8,2,128,1,float16,float16,0,1.0469296455383301
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,8,2,128,1,fp8,fp8,0,0.9804927825927734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,8,4,128,1,float16,float16,0,1.0710576057434082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,8,128,1,float16,fp8,0,0.5190383911132812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,8,128,1,float16,float16,0,0.5924448013305664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,8,4,128,1,float16,fp8,0,0.9768239974975585
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,8,4,128,1,fp8,fp8,0,0.9781824111938476
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,8,128,1,fp8,fp8,0,0.5181727886199952
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,1,128,1,float16,float16,0,0.529318380355835
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,1,128,1,float16,fp8,0,0.5715087890625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,1,128,1,fp8,fp8,0,0.5151968002319336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,2,128,1,float16,fp8,0,0.5164512157440185
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,2,128,1,fp8,fp8,0,0.518393611907959
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,4,128,1,float16,float16,0,0.5659711837768555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,4,128,1,float16,fp8,0,0.5163680076599121
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,8,128,1,float16,float16,0,0.31411840915679934
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,4,128,1,fp8,fp8,0,0.5311552047729492
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,8,128,1,float16,fp8,0,0.30504000186920166
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,8,128,1,fp8,fp8,0,0.28679039478302004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,1,128,1,float16,float16,0,0.28953120708465574
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,1,128,1,float16,fp8,0,0.28796000480651857
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,1,128,1,fp8,fp8,0,0.2836047887802124
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,2,128,1,float16,fp8,0,0.28444159030914307
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,2,128,1,fp8,fp8,0,0.28647360801696775
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,4,128,1,float16,float16,0,0.30174078941345217
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,4,128,1,float16,fp8,0,0.28543519973754883
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,8,128,1,float16,float16,0,0.18468480110168456
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,8,128,1,float16,fp8,0,0.17089920043945311
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,4,128,1,fp8,fp8,0,0.28569440841674804
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,8,128,1,fp8,fp8,0,0.16893600225448607
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,8,1,128,1,fp8,fp8,0,1.0204352378845214
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,1,128,1,float16,float16,0,0.1704192042350769
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,1,128,1,float16,fp8,0,0.17019519805908204
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,1,128,1,fp8,fp8,0,0.1687440037727356
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,2,128,1,float16,float16,0,0.17057119607925414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,2,128,1,float16,fp8,0,0.167849600315094
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,2,128,1,fp8,fp8,0,0.16969759464263917
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,4,128,1,float16,fp8,0,0.168121600151062
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,4,128,1,fp8,fp8,0,0.16875840425491334
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,8,128,1,float16,float16,0,0.11829279661178589
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,8,128,1,float16,fp8,0,0.11127519607543945
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,8,128,1,fp8,fp8,0,0.11048799753189087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,1,128,1,float16,float16,0,0.11314560174942016
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,1,128,1,float16,fp8,0,0.1105679988861084
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,1,128,1,fp8,fp8,0,0.11051839590072632
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,2,128,1,float16,float16,0,0.11335519552230836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,2,128,1,float16,fp8,0,0.1093567967414856
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,4,128,1,float16,float16,0,0.11530719995498658
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,2,128,1,fp8,fp8,0,0.11070079803466797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,4,128,1,float16,fp8,0,0.10979199409484863
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,4,128,1,fp8,fp8,0,0.1094864010810852
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,4,128,1,fp8,fp8,0,0.16660959720611573
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,2,128,1,float16,float16,0,0.5350207805633544
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,8,1,128,1,float16,float16,0,0.9552016258239746
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,8,1,128,1,fp8,fp8,0,0.9530752182006836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,8,2,128,1,float16,float16,0,0.9728143692016602
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,8,2,128,1,float16,fp8,0,0.9573936462402344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,8,2,128,1,fp8,fp8,0,0.9544976234436036
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,2,128,1,float16,float16,0,0.30284318923950193
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,8,128,1,float16,float16,0,0.5654367923736572
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,8,4,128,1,float16,float16,0,1.0068016052246094
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,8,4,128,1,float16,fp8,0,0.9535584449768066
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,8,4,128,1,fp8,fp8,0,1.0603008270263672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,8,128,1,float16,fp8,0,0.49898557662963866
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,8,128,1,fp8,fp8,0,0.49925599098205564
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,1,128,1,float16,float16,0,0.49573278427124023
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,1,128,1,float16,fp8,0,0.5715871810913086
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,1,128,1,fp8,fp8,0,0.49596800804138186
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,2,128,1,float16,float16,0,0.5040512084960938
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,2,128,1,float16,fp8,0,0.4982448101043701
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,4,128,1,float16,float16,0,0.17602720260620117
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,2,128,1,fp8,fp8,0,0.49777917861938475
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,4,128,1,float16,float16,0,0.5241968154907226
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,8,128,1,float16,fp8,0,0.27139360904693605
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,1,128,1,float16,float16,0,0.2655663967132568
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,8,128,1,fp8,fp8,0,0.2707887887954712
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,8,128,1,float16,float16,0,0.3013056039810181
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,4,128,1,fp8,fp8,0,0.4970384120941162
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,1,128,1,float16,fp8,0,0.2981199979782104
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,1,128,1,fp8,fp8,0,0.26942241191864014
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,2,128,1,float16,float16,0,0.26711359024047854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,2,128,1,float16,fp8,0,0.26986720561981203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,2,128,1,fp8,fp8,0,0.2744751930236816
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,4,128,1,float16,float16,0,0.2754143953323364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,8,128,1,float16,float16,0,0.17001919746398925
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,4,128,1,fp8,fp8,0,0.27020480632781985
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,8,128,1,float16,fp8,0,0.15694559812545777
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,8,128,1,fp8,fp8,0,0.1585039973258972
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,1,128,1,float16,float16,0,0.1514623999595642
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,1,128,1,float16,fp8,0,0.1551967978477478
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,1,128,1,fp8,fp8,0,0.15447200536727906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,2,128,1,float16,fp8,0,0.15491679906845093
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,2,128,1,fp8,fp8,0,0.1542080044746399
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,4,128,1,float16,float16,0,0.16004480123519899
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,4,128,1,float16,fp8,0,0.15495519638061522
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,4,128,1,fp8,fp8,0,0.1544319987297058
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,8,1,128,1,float16,fp8,0,0.9561552047729492
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,8,128,1,float16,fp8,0,0.0986303985118866
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,8,128,1,fp8,fp8,0,0.09868159890174866
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,1,128,1,float16,float16,0,0.09926239848136902
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,1,128,1,fp8,fp8,0,0.0978384017944336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,2,128,1,float16,float16,0,0.09937599897384644
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,2,128,1,fp8,fp8,0,0.09821119904518127
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,4,128,1,float16,float16,0,0.10205119848251343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,4,128,1,float16,fp8,0,0.09855999946594238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,4,128,1,fp8,fp8,0,0.09885759949684143
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,8,128,1,float16,float16,0,0.06587520241737366
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,8,128,1,float16,fp8,0,0.06166560053825378
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,8,128,1,fp8,fp8,0,0.06169760227203369
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,1,128,1,float16,float16,0,0.062377601861953735
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,4,128,1,float16,fp8,0,0.539031982421875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,1,128,1,float16,fp8,0,0.06194239854812622
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,1,128,1,fp8,fp8,0,0.061689597368240354
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,2,128,1,float16,float16,0,0.06284319758415222
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,2,128,1,float16,fp8,0,0.06165120005607605
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,2,128,1,fp8,fp8,0,0.06166239976882935
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,4,128,1,float16,float16,0,0.06372320055961608
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,4,128,1,float16,fp8,0,0.061692798137664796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,4,128,1,float16,fp8,0,0.30006558895111085
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,4,128,1,fp8,fp8,0,0.06185439825057983
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,2,128,1,float16,float16,0,0.15378719568252563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,8,128,1,float16,float16,0,0.10787520408630372
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,8,1,128,1,float16,float16,0,0.5765552043914794
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,8,1,128,1,float16,fp8,0,0.5996384143829345
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,8,1,128,1,fp8,fp8,0,0.5992544174194336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,1,128,1,float16,fp8,0,0.09852799773216248
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,8,2,128,1,float16,float16,0,0.5871647834777832
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,2,128,1,float16,fp8,0,0.09763360023498535
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,8,2,128,1,float16,fp8,0,0.5986735820770264
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,8,2,128,1,fp8,fp8,0,0.6000736236572266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,8,128,1,float16,float16,0,0.35273919105529783
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,8,4,128,1,float16,float16,0,0.6181759834289551
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,8,4,128,1,float16,fp8,0,0.5999839782714844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,8,128,1,float16,fp8,0,0.3162735939025879
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,8,4,128,1,fp8,fp8,0,0.5965792179107666
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,1,128,1,float16,float16,0,0.30009920597076417
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,1,128,1,float16,fp8,0,0.31628639698028566
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,2,128,1,float16,float16,0,0.3019887924194336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,2,128,1,float16,fp8,0,0.31548960208892823
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,2,128,1,fp8,fp8,0,0.31472480297088623
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,4,128,1,float16,float16,0,0.31812961101531984
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,8,128,1,float16,float16,0,0.19007999897003175
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,4,128,1,fp8,fp8,0,0.31774239540100097
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,8,128,1,float16,fp8,0,0.17531039714813232
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,4,128,1,float16,fp8,0,0.31680319309234617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,8,128,1,fp8,fp8,0,0.1772160053253174
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,1,128,1,float16,float16,0,0.16796959638595582
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,1,128,1,float16,fp8,0,0.1734048008918762
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,2,128,1,float16,float16,0,0.1684399962425232
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,1,128,1,fp8,fp8,0,0.17411199808120728
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,2,128,1,fp8,fp8,0,0.1739776015281677
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,4,128,1,float16,float16,0,0.1762928009033203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,8,128,1,float16,float16,0,0.11436799764633179
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,4,128,1,fp8,fp8,0,0.17405439615249635
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,8,128,1,float16,fp8,0,0.10343359708786011
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,8,128,1,fp8,fp8,0,0.10401439666748047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,1,128,1,float16,float16,0,0.1010815978050232
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,1,128,1,float16,fp8,0,0.10240319967269898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,1,128,1,fp8,fp8,0,0.10326240062713624
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,2,128,1,float16,float16,0,0.10065439939498902
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,2,128,1,float16,fp8,0,0.10345280170440674
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,2,128,1,fp8,fp8,0,0.10232800245285034
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,4,128,1,float16,float16,0,0.10511679649353027
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,4,128,1,float16,fp8,0,0.10309920310974122
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,8,128,1,float16,float16,0,0.07219840288162231
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,4,128,1,fp8,fp8,0,0.10372639894485473
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,8,128,1,float16,fp8,0,0.06717119812965393
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,8,128,1,fp8,fp8,0,0.06736159920692444
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,1,128,1,float16,float16,0,0.06712160110473633
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,1,128,1,float16,fp8,0,0.06600000262260437
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,1,128,1,fp8,fp8,0,0.06683520078659058
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,2,128,1,float16,float16,0,0.06696959733963012
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,2,128,1,fp8,fp8,0,0.06604959964752197
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,2,128,1,float16,fp8,0,0.06690239906311035
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,4,128,1,float16,float16,0,0.06864799857139588
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,4,128,1,fp8,fp8,0,0.06620799899101257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,8,128,1,float16,float16,0,0.05145919919013977
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,1,128,1,float16,float16,0,0.04936479926109314
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,8,128,1,float16,fp8,0,0.0493120014667511
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,1,128,1,float16,fp8,0,0.04853599965572357
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,1,128,1,fp8,fp8,0,0.04936319887638092
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,2,128,1,float16,float16,0,0.049414399266242984
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,2,128,1,float16,fp8,0,0.04930399954319
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,2,128,1,fp8,fp8,0,0.0488400012254715
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,4,128,1,float16,float16,0,0.05031520128250122
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,4,128,1,float16,fp8,0,0.049307200312614444
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,4,128,1,fp8,fp8,0,0.0491344004869461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,8,128,1,fp8,fp8,0,0.31758880615234375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,8,1,128,1,float16,float16,0,0.5679008007049561
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,8,1,128,1,float16,fp8,0,0.6197648048400879
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,1,128,1,fp8,fp8,0,0.3162847995758057
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,8,1,128,1,fp8,fp8,0,0.6190336227416993
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,8,2,128,1,float16,fp8,0,0.6196144104003907
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,2,128,1,float16,fp8,0,0.17448320388793945
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,4,128,1,float16,fp8,0,0.17437440156936646
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,8,2,128,1,fp8,fp8,0,0.6191455841064453
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,8,4,128,1,float16,float16,0,0.6164735794067383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,8,4,128,1,float16,fp8,0,0.6194352149963379
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,8,128,1,float16,float16,0,0.3632335901260376
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,8,4,128,1,fp8,fp8,0,0.6170976161956787
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,8,128,1,float16,fp8,0,0.3279007911682129
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,8,128,1,fp8,fp8,0,0.32409279346466063
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,4,128,1,float16,fp8,0,0.06664320230484008
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,1,128,1,float16,float16,0,0.29505600929260256
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,1,128,1,float16,fp8,0,0.3211280107498169
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,8,128,1,fp8,fp8,0,0.049200001358985904
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,1,128,1,fp8,fp8,0,0.3457312107086182
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,2,128,1,float16,float16,0,0.2959743976593018
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,2,128,1,float16,fp8,0,0.3222928047180176
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,2,128,1,fp8,fp8,0,0.3222304105758667
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,8,128,1,float16,float16,0,0.19045759439468385
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,4,128,1,float16,float16,0,0.3158384084701538
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,4,128,1,fp8,fp8,0,0.33285439014434814
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,8,128,1,float16,fp8,0,0.17670719623565673
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,8,128,1,fp8,fp8,0,0.17568639516830445
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,1,128,1,float16,float16,0,0.1594591975212097
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,1,128,1,float16,fp8,0,0.17381759881973266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,2,128,1,float16,float16,0,0.1606927990913391
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,1,128,1,fp8,fp8,0,0.17938239574432374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,2,128,1,float16,fp8,0,0.1737663984298706
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,2,128,1,fp8,fp8,0,0.17395999431610107
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,4,128,1,float16,float16,0,0.1704192042350769
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,4,128,1,fp8,fp8,0,0.18082239627838134
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,8,128,1,float16,fp8,0,0.10068960189819336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,8,128,1,fp8,fp8,0,0.10127999782562255
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,1,128,1,float16,float16,0,0.09671040177345276
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,1,128,1,float16,fp8,0,0.09895679950714112
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,1,128,1,fp8,fp8,0,0.09855520129203796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,2,128,1,float16,float16,0,0.09804959893226624
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,2,128,1,float16,fp8,0,0.09911519885063172
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,2,128,1,fp8,fp8,0,0.09904159903526306
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,4,128,1,float16,float16,0,0.10135040283203126
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,4,128,1,float16,fp8,0,0.0992576003074646
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,4,128,1,fp8,fp8,0,0.09891359806060791
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,8,2,128,1,float16,float16,0,0.5773119926452637
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,8,128,1,float16,fp8,0,0.0616703987121582
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,8,128,1,fp8,fp8,0,0.061699199676513675
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,1,128,1,float16,float16,0,0.059724801778793336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,1,128,1,float16,fp8,0,0.06155359745025635
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,1,128,1,fp8,fp8,0,0.061534398794174196
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,2,128,1,float16,fp8,0,0.061641597747802736
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,2,128,1,fp8,fp8,0,0.061286401748657224
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,4,128,1,float16,float16,0,0.06250560283660889
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,4,128,1,fp8,fp8,0,0.061667197942733766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,8,128,1,float16,float16,0,0.04119519889354706
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,8,128,1,float16,fp8,0,0.03912320137023926
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,8,128,1,fp8,fp8,0,0.039024001359939574
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,1,128,1,float16,float16,0,0.038332799077034
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,1,128,1,float16,fp8,0,0.039048001170158386
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,1,128,1,fp8,fp8,0,0.039103999733924866
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,2,128,1,float16,float16,0,0.037780800461769105
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,2,128,1,float16,fp8,0,0.03915199935436249
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,2,128,1,fp8,fp8,0,0.03906559944152832
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,4,128,1,float16,float16,0,0.03911679983139038
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,4,128,1,float16,fp8,0,0.039103999733924866
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,4,128,1,fp8,fp8,0,0.039208000898361205
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,8,128,1,float16,float16,0,0.03696799874305725
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,8,128,1,float16,fp8,0,0.03531680107116699
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,8,128,1,fp8,fp8,0,0.03502880036830902
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,4,128,1,float16,fp8,0,0.3223695993423462
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,1,128,1,float16,float16,0,0.03585439920425415
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,1,128,1,float16,fp8,0,0.03503040075302124
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,2,128,1,float16,float16,0,0.03597599864006042
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,1,128,1,fp8,fp8,0,0.03504959940910339
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,2,128,1,fp8,fp8,0,0.0350847989320755
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,4,128,1,float16,float16,0,0.03671999871730804
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,4,128,1,float16,fp8,0,0.035067200660705566
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,4,128,1,fp8,fp8,0,0.03508960008621216
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,8,128,1,float16,float16,0,0.10979039669036865
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,4,128,1,float16,fp8,0,0.1758128046989441
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,8,1,128,1,float16,float16,0,0.35583679676055907
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,8,1,128,1,float16,fp8,0,0.4069024085998535
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,8,1,128,1,fp8,fp8,0,0.405620813369751
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,8,2,128,1,float16,float16,0,0.35823678970336914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,8,2,128,1,float16,fp8,0,0.40819358825683594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,8,2,128,1,fp8,fp8,0,0.4071311950683594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,8,128,1,float16,float16,0,0.06904000043869019
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,8,4,128,1,float16,float16,0,0.3965888023376465
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,8,4,128,1,float16,fp8,0,0.4086160182952881
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,2,128,1,float16,float16,0,0.05963519811630249
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,8,128,1,float16,float16,0,0.23537120819091797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,8,128,1,fp8,fp8,0,0.2161407947540283
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,8,128,1,float16,fp8,0,0.2151087999343872
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,8,4,128,1,fp8,fp8,0,0.40756001472473147
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,1,128,1,float16,float16,0,0.18776479959487916
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,4,128,1,float16,fp8,0,0.0612559974193573
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,2,128,1,float16,float16,0,0.19096959829330445
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,2,128,1,fp8,fp8,0,0.21476640701293945
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,4,128,1,float16,float16,0,0.20448000431060792
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,1,128,1,fp8,fp8,0,0.21407039165496827
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,4,128,1,float16,fp8,0,0.21367199420928956
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,4,128,1,fp8,fp8,0,0.21485440731048583
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,8,128,1,float16,fp8,0,0.1197808027267456
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,8,128,1,fp8,fp8,0,0.11949119567871094
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,1,128,1,float16,float16,0,0.10731840133666992
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,1,128,1,float16,fp8,0,0.11636799573898315
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,1,128,1,fp8,fp8,0,0.11720000505447388
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,2,128,1,float16,float16,0,0.10815680027008057
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,2,128,1,float16,fp8,0,0.1174496054649353
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,2,128,1,fp8,fp8,0,0.11756479740142822
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,4,128,1,float16,float16,0,0.11510560512542725
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,4,128,1,float16,fp8,0,0.11810719966888428
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,4,128,1,fp8,fp8,0,0.11895040273666382
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,8,128,1,float16,float16,0,0.07653599977493286
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,8,128,1,float16,fp8,0,0.06900479793548583
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,8,128,1,fp8,fp8,0,0.06915199756622314
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,1,128,1,float16,float16,0,0.06366559863090515
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,1,128,1,float16,fp8,0,0.06795520186424256
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,1,128,1,fp8,fp8,0,0.0685536026954651
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,2,128,1,float16,float16,0,0.06400799751281738
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,2,128,1,float16,fp8,0,0.06824960112571717
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,2,128,1,float16,fp8,0,0.035041600465774536
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,2,128,1,fp8,fp8,0,0.06834880113601685
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,4,128,1,float16,float16,0,0.06813759803771972
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,4,128,1,float16,fp8,0,0.0684112012386322
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,4,128,1,fp8,fp8,0,0.06865760087966918
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,8,128,1,float16,fp8,0,0.04327679872512817
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,8,128,1,fp8,fp8,0,0.043505600094795226
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,1,128,1,float16,float16,0,0.04300960004329681
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,1,128,1,float16,fp8,0,0.04323360025882721
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,1,128,1,fp8,fp8,0,0.04324640035629272
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,2,128,1,float16,float16,0,0.042694398760795595
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,2,128,1,float16,fp8,0,0.0432671993970871
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,2,128,1,fp8,fp8,0,0.04324159920215607
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,4,128,1,float16,fp8,0,0.04323840141296387
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,4,128,1,float16,float16,0,0.04442879855632782
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,8,128,1,float16,float16,0,0.0330592006444931
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,8,128,1,float16,fp8,0,0.030950400233268737
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,4,128,1,fp8,fp8,0,0.04322560131549835
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,8,128,1,fp8,fp8,0,0.030905601382255555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,1,128,1,float16,float16,0,0.030827200412750243
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,1,128,1,float16,fp8,0,0.030955201387405394
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,1,128,1,fp8,fp8,0,0.030976000428199767
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,2,128,1,float16,float16,0,0.030929601192474364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,2,128,1,float16,fp8,0,0.030899199843406677
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,2,128,1,fp8,fp8,0,0.03094080090522766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,4,128,1,float16,float16,0,0.03089439868927002
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,4,128,1,float16,fp8,0,0.03091520071029663
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,4,128,1,fp8,fp8,0,0.030947199463844298
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,8,128,1,float16,float16,0,0.030846399068832398
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,8,128,1,float16,fp8,0,0.028896000981330872
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,8,128,1,fp8,fp8,0,0.02892000079154968
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,1,128,1,float16,float16,0,0.028915199637413024
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,1,128,1,float16,fp8,0,0.028860801458358766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,1,128,1,fp8,fp8,0,0.02887679934501648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,2,128,1,float16,float16,0,0.028907200694084166
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,2,128,1,fp8,fp8,0,0.028911998867988585
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,4,128,1,float16,float16,0,0.02890239953994751
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,4,128,1,float16,fp8,0,0.02892000079154968
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,4,128,1,fp8,fp8,0,0.02889760136604309
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,1,128,1,float16,fp8,0,0.21261920928955078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,2,128,1,float16,fp8,0,0.21399199962615967
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,8,1,128,1,float16,float16,0,0.3734224081039429
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,8,1,128,1,float16,fp8,0,0.4497663974761963
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,8,128,1,float16,float16,0,0.1295024037361145
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,8,2,128,1,float16,float16,0,0.37523839473724363
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,8,2,128,1,float16,fp8,0,0.4492815971374512
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,8,2,128,1,fp8,fp8,0,0.45041918754577637
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,8,4,128,1,float16,float16,0,0.4205935955047607
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,8,128,1,float16,float16,0,0.2602751970291138
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,8,128,1,float16,fp8,0,0.23554880619049073
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,8,4,128,1,fp8,fp8,0,0.4476463794708252
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,8,128,1,fp8,fp8,0,0.23669440746307374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,1,128,1,float16,float16,0,0.1958847999572754
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,2,128,1,float16,float16,0,0.19640640020370484
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,1,128,1,fp8,fp8,0,0.23299360275268555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,8,128,1,float16,float16,0,0.04733439981937408
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,2,128,1,float16,fp8,0,0.23392000198364257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,2,128,1,fp8,fp8,0,0.23504319190979003
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,4,128,1,float16,fp8,0,0.23458878993988036
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,8,128,1,float16,float16,0,0.1381600022315979
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,8,128,1,float16,fp8,0,0.1275167942047119
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,8,128,1,fp8,fp8,0,0.1266543984413147
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,1,128,1,float16,float16,0,0.10756640434265137
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,1,128,1,float16,fp8,0,0.12464799880981445
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,1,128,1,fp8,fp8,0,0.12533919811248778
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,2,128,1,float16,fp8,0,0.1260159969329834
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,2,128,1,float16,fp8,0,0.028857600688934327
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,2,128,1,fp8,fp8,0,0.12524319887161256
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,4,128,1,float16,float16,0,0.11842399835586548
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,4,128,1,float16,fp8,0,0.12586719989776612
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,4,128,1,fp8,fp8,0,0.12683360576629638
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,8,128,1,float16,fp8,0,0.07258560061454773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,8,128,1,float16,float16,0,0.07911520004272461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,8,1,128,1,fp8,fp8,0,0.44703521728515627
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,1,128,1,float16,float16,0,0.06228799819946289
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,1,128,1,float16,fp8,0,0.06964480280876159
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,1,128,1,fp8,fp8,0,0.06991199851036071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,2,128,1,float16,float16,0,0.06352639794349671
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,2,128,1,float16,fp8,0,0.0702895998954773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,2,128,1,fp8,fp8,0,0.06975039839744568
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,4,128,1,float16,float16,0,0.06931520104408265
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,4,128,1,float16,fp8,0,0.070551997423172
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,8,4,128,1,float16,fp8,0,0.4504223823547363
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,8,128,1,float16,float16,0,0.04745599925518036
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,8,128,1,float16,fp8,0,0.04321439862251282
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,1,128,1,float16,fp8,0,0.23298559188842774
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,8,128,1,fp8,fp8,0,0.04330720007419586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,1,128,1,float16,float16,0,0.03972960114479065
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,1,128,1,float16,fp8,0,0.04324640035629272
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,1,128,1,fp8,fp8,0,0.043017598986625674
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,4,128,1,float16,float16,0,0.21621599197387695
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,2,128,1,float16,float16,0,0.04006240069866181
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,2,128,1,float16,fp8,0,0.04315040111541748
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,2,128,1,fp8,fp8,0,0.04325760006904602
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,4,128,1,float16,float16,0,0.04261919856071472
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,4,128,1,fp8,fp8,0,0.043289598822593686
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,8,128,1,float16,float16,0,0.028908801078796387
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,8,128,1,float16,fp8,0,0.02688319981098175
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,8,128,1,fp8,fp8,0,0.02682720124721527
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,1,128,1,float16,float16,0,0.02481440007686615
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,1,128,1,float16,fp8,0,0.026873600482940675
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,1,128,1,fp8,fp8,0,0.02688960134983063
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,2,128,1,float16,float16,0,0.02479359954595566
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,2,128,1,float16,fp8,0,0.026878398656845093
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,2,128,1,float16,float16,0,0.10818560123443603
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,2,128,1,fp8,fp8,0,0.027249601483345032
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,4,128,1,float16,float16,0,0.026086398959159852
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,4,128,1,float16,fp8,0,0.026848000288009644
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,4,128,1,fp8,fp8,0,0.026848000288009644
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,8,128,1,float16,float16,0,0.02479359954595566
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,8,128,1,fp8,fp8,0,0.024055999517440797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,1,128,1,float16,float16,0,0.022777600586414336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,1,128,1,float16,fp8,0,0.023177599906921385
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,1,128,1,fp8,fp8,0,0.024673600494861603
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,2,128,1,float16,float16,0,0.022732800245285033
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,2,128,1,float16,fp8,0,0.02481119930744171
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,2,128,1,fp8,fp8,0,0.023600000143051147
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,4,128,1,float16,float16,0,0.024743999540805816
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,4,128,1,float16,fp8,0,0.023895999789237975
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,4,128,1,fp8,fp8,0,0.024711999297142028
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,8,128,1,float16,float16,0,0.022728000581264497
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,8,128,1,float16,fp8,0,0.0227743998169899
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,8,128,1,fp8,fp8,0,0.02276480048894882
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,8,128,1,fp8,fp8,0,0.07238720059394836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,1,128,1,float16,float16,0,0.022779199481010436
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,1,128,1,float16,fp8,0,0.02271360009908676
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,2,128,1,float16,float16,0,0.022731199860572815
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,2,128,1,float16,fp8,0,0.022755199670791627
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,2,128,1,fp8,fp8,0,0.022761599719524385
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,4,128,1,float16,float16,0,0.022708800435066224
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,4,128,1,float16,fp8,0,0.022759999334812164
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,4,128,1,fp8,fp8,0,0.022776000201702118
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,8,1,128,1,float16,float16,0,0.27593920230865476
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,4,128,1,fp8,fp8,0,0.07058240175247192
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,8,1,128,1,float16,fp8,0,0.3634095907211304
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,8,2,128,1,float16,float16,0,0.28090879917144773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,8,1,128,1,fp8,fp8,0,0.3619647979736328
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,8,2,128,1,float16,fp8,0,0.3632256031036377
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,8,2,128,1,fp8,fp8,0,0.3608975887298584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,8,4,128,1,float16,float16,0,0.3226655960083008
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,8,4,128,1,float16,fp8,0,0.36185760498046876
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,8,8,128,1,float16,float16,0,0.20957601070404053
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,8,4,128,1,fp8,fp8,0,0.36335680484771726
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,8,8,128,1,float16,fp8,0,0.19077919721603392
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,8,8,128,1,fp8,fp8,0,0.18902239799499512
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,8,1,128,1,float16,float16,0,0.1472272038459778
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,8,1,128,1,float16,fp8,0,0.18892960548400878
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,8,1,128,1,fp8,fp8,0,0.18918559551239014
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,8,2,128,1,float16,fp8,0,0.18736799955368041
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,8,4,128,1,float16,float16,0,0.16769759654998778
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,8,4,128,1,float16,fp8,0,0.1875712037086487
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,8,4,128,1,fp8,fp8,0,0.18914239406585692
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,8,8,128,1,float16,float16,0,0.11142079830169678
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,8,8,128,1,float16,fp8,0,0.10243840217590332
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,8,8,128,1,fp8,fp8,0,0.10179840326309204
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,8,1,128,1,float16,float16,0,0.082505601644516
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,8,1,128,1,float16,fp8,0,0.1000864028930664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,8,1,128,1,fp8,fp8,0,0.10128159523010254
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,8,2,128,1,float16,float16,0,0.08286399841308593
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,1,128,1,fp8,fp8,0,0.02274080067873001
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,8,2,128,1,float16,fp8,0,0.10147839784622192
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,8,2,128,1,fp8,fp8,0,0.10084320306777954
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,8,4,128,1,float16,float16,0,0.09226400256156922
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,8,4,128,1,float16,fp8,0,0.10205279588699341
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,8,8,128,1,float16,float16,0,0.06377120018005371
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,8,4,128,1,fp8,fp8,0,0.10180799961090088
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,8,8,128,1,float16,fp8,0,0.0575007975101471
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,8,8,128,1,fp8,fp8,0,0.057545602321624756
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,8,1,128,1,float16,float16,0,0.046561598777770996
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,8,1,128,1,float16,fp8,0,0.05551999807357788
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,8,1,128,1,fp8,fp8,0,0.05550240278244019
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,8,2,128,1,float16,float16,0,0.04755040109157562
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,8,2,128,1,float16,fp8,0,0.055511999130249026
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,8,4,128,1,float16,float16,0,0.05348960161209106
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,8,2,128,1,fp8,fp8,0,0.05559520125389099
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,8,8,128,1,float16,float16,0,0.038473600149154664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,8,8,128,1,float16,fp8,0,0.03311040103435516
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,8,8,128,1,fp8,fp8,0,0.0330592006444931
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,8,1,128,1,float16,float16,0,0.02956480085849762
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,8,1,128,1,float16,fp8,0,0.033030399680137636
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,4,128,1,fp8,fp8,0,0.23419039249420165
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,8,1,128,1,fp8,fp8,0,0.03332479894161224
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,8,2,128,1,float16,float16,0,0.029528000950813295
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,8,2,128,1,float16,fp8,0,0.0330592006444931
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,8,2,128,1,fp8,fp8,0,0.03298240005970001
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,8,2,128,1,float16,float16,0,0.1485919952392578
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,8,4,128,1,float16,float16,0,0.03163360059261322
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,8,4,128,1,float16,fp8,0,0.0330128014087677
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,8,4,128,1,fp8,fp8,0,0.03299840092658997
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,8,8,128,1,float16,float16,0,0.021692800521850585
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,8,2,128,1,fp8,fp8,0,0.18880159854888917
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,8,8,128,1,float16,fp8,0,0.020712000131607056
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,8,8,128,1,fp8,fp8,0,0.020755200088024138
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,8,1,128,1,float16,float16,0,0.01870719939470291
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,8,1,128,1,float16,fp8,0,0.02067520022392273
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,8,1,128,1,fp8,fp8,0,0.020761600136756896
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,8,2,128,1,float16,float16,0,0.01868959963321686
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,8,2,128,1,float16,fp8,0,0.020718400180339814
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,8,2,128,1,fp8,fp8,0,0.02075680047273636
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,8,4,128,1,float16,float16,0,0.020755200088024138
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,8,4,128,1,float16,fp8,0,0.020720000565052032
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,8,4,128,1,fp8,fp8,0,0.020686399936676026
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,8,8,128,1,float16,float16,0,0.018646399676799773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,8,8,128,1,float16,fp8,0,0.018667200207710268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,8,8,128,1,fp8,fp8,0,0.018675200641155243
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,8,1,128,1,float16,float16,0,0.016606399416923524
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,8,1,128,1,fp8,fp8,0,0.018713599443435668
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,8,2,128,1,float16,float16,0,0.016704000532627106
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,8,1,128,1,float16,fp8,0,0.018644799292087556
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,8,2,128,1,float16,fp8,0,0.018745599687099455
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,8,2,128,1,fp8,fp8,0,0.018723200261592864
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,8,4,128,1,float16,fp8,0,0.01867839992046356
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,8,4,128,1,fp8,fp8,0,0.018619200587272643
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,8,8,128,1,float16,float16,0,0.01660960018634796
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,8,8,128,1,float16,fp8,0,0.016568000614643096
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,8,1,128,1,float16,float16,0,0.016550399363040924
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,8,8,128,1,fp8,fp8,0,0.016633599996566772
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,8,1,128,1,float16,fp8,0,0.01661120057106018
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,8,1,128,1,fp8,fp8,0,0.016601599752902985
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,8,2,128,1,float16,fp8,0,0.016651199758052827
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,8,2,128,1,float16,float16,0,0.01661120057106018
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,8,2,128,1,fp8,fp8,0,0.016659200191497803
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,8,4,128,1,float16,float16,0,0.016631999611854555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,8,4,128,1,float16,fp8,0,0.01669120043516159
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,8,4,128,1,fp8,fp8,0,0.016630400717258454
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,8,8,128,1,float16,float16,0,0.01661120057106018
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,8,8,128,1,float16,fp8,0,0.016624000668525696
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,8,4,128,1,float16,fp8,0,0.055567997694015506
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,8,8,128,1,fp8,fp8,0,0.016620799899101257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,8,1,128,1,float16,float16,0,0.016521599888801575
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,8,1,128,1,float16,fp8,0,0.016590400040149687
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,8,4,128,1,fp8,fp8,0,0.05552639961242676
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,8,2,128,1,float16,fp8,0,0.01652639955282211
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,8,2,128,1,fp8,fp8,0,0.016505600512027742
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,8,4,128,1,float16,float16,0,0.01653279960155487
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,8,4,128,1,float16,fp8,0,0.01653279960155487
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,8,4,128,1,fp8,fp8,0,0.016606399416923524
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,8,1,128,1,float16,float16,0,0.12140640020370483
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,4,128,1,float16,fp8,0,0.04323840141296387
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,8,1,128,1,float16,fp8,0,0.16386239528656005
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,8,2,128,1,float16,float16,0,0.12280479669570923
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,8,1,128,1,fp8,fp8,0,0.16237759590148926
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,8,2,128,1,float16,fp8,0,0.1645535945892334
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,8,2,128,1,fp8,fp8,0,0.16463680267333985
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,8,4,128,1,float16,float16,0,0.14202719926834106
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,8,4,128,1,float16,fp8,0,0.16446720361709594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,8,4,128,1,fp8,fp8,0,0.1644255995750427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,8,8,128,1,float16,fp8,0,0.08806880116462708
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,8,1,128,1,float16,float16,0,0.06729440093040466
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,8,1,128,1,float16,fp8,0,0.08621280193328858
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,8,1,128,1,fp8,fp8,0,0.086217600107193
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,8,2,128,1,float16,float16,0,0.06805920004844665
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,8,2,128,1,float16,fp8,0,0.08652960062026978
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,8,2,128,1,fp8,fp8,0,0.08671680092811584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,8,4,128,1,float16,float16,0,0.07714560031890869
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,8,4,128,1,float16,fp8,0,0.08715839982032776
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,8,4,128,1,fp8,fp8,0,0.08697760105133057
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,8,4,128,1,float16,float16,0,0.017723199725151063
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,8,8,128,1,float16,float16,0,0.05618559718132019
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,8,8,128,1,float16,fp8,0,0.05119199752807617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,8,8,128,1,fp8,fp8,0,0.05084159970283508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,8,1,128,1,float16,float16,0,0.039192000031471254
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,8,1,128,1,float16,fp8,0,0.048681598901748654
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,8,1,128,1,fp8,fp8,0,0.04888960123062134
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,8,2,128,1,float16,float16,0,0.03997600078582764
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,8,2,128,1,float16,fp8,0,0.049185600876808164
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,8,2,128,1,fp8,fp8,0,0.04933440089225769
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,8,4,128,1,float16,float16,0,0.046235200762748715
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,8,4,128,1,float16,fp8,0,0.049307200312614444
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,8,4,128,1,fp8,fp8,0,0.04938879907131195
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,8,8,128,1,float16,float16,0,0.032660800218582156
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,8,8,128,1,fp8,fp8,0,0.028896000981330872
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,8,8,128,1,float16,fp8,0,0.02884320020675659
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,8,1,128,1,float16,float16,0,0.02327679991722107
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,8,1,128,1,float16,fp8,0,0.028401601314544677
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,8,1,128,1,fp8,fp8,0,0.028756800293922424
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,8,2,128,1,float16,float16,0,0.023758399486541747
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,8,2,128,1,float16,fp8,0,0.028361600637435914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,8,2,128,1,fp8,fp8,0,0.028782400488853454
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,8,4,128,1,float16,float16,0,0.026420798897743226
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,8,4,128,1,fp8,fp8,0,0.028859201073646545
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,8,1,128,1,fp8,fp8,0,0.016519999504089354
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,8,2,128,1,float16,float16,0,0.016596800088882445
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,8,8,128,1,float16,float16,0,0.018636800348758698
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,8,8,128,1,float16,fp8,0,0.018667200207710268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,8,8,128,1,fp8,fp8,0,0.018641600012779237
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,8,1,128,1,float16,float16,0,0.016484799981117248
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,8,1,128,1,float16,fp8,0,0.018559999763965607
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,8,2,128,1,float16,float16,0,0.01650400012731552
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,8,1,128,1,fp8,fp8,0,0.018665599822998046
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,8,2,128,1,float16,fp8,0,0.018667200207710268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,8,4,128,1,float16,float16,0,0.016574400663375854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,8,4,128,1,float16,fp8,0,0.01870719939470291
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,8,4,128,1,fp8,fp8,0,0.018606400489807128
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,8,8,128,1,float16,float16,0,0.01592320054769516
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,8,8,128,1,float16,fp8,0,0.015241600573062897
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,8,8,128,1,fp8,fp8,0,0.014580799639225006
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,8,1,128,1,float16,float16,0,0.014542399346828461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,8,1,128,1,float16,fp8,0,0.014564800262451171
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,8,1,128,1,fp8,fp8,0,0.01505119949579239
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,8,2,128,1,float16,float16,0,0.014560000598430633
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,8,2,128,1,float16,fp8,0,0.014577600359916686
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,8,4,128,1,float16,float16,0,0.014555199444293976
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,8,2,128,1,fp8,fp8,0,0.015268799662590028
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,8,8,128,1,float16,float16,0,0.09795039892196655
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,8,4,128,1,float16,fp8,0,0.014545600116252898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,8,4,128,1,fp8,fp8,0,0.015500800311565399
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,8,8,128,1,float16,float16,0,0.014528000354766845
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,8,128,1,float16,fp8,0,0.024324800074100494
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,8,8,128,1,float16,fp8,0,0.014552000164985656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,8,1,128,1,float16,fp8,0,0.014441600441932679
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,8,1,128,1,fp8,fp8,0,0.014577600359916686
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,8,2,128,1,float16,float16,0,0.012532800436019897
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,8,2,128,1,float16,fp8,0,0.014529600739479065
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,8,2,128,1,fp8,fp8,0,0.01451680064201355
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,8,4,128,1,float16,float16,0,0.014534400403499603
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,8,4,128,1,fp8,fp8,0,0.014571200311183929
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,8,4,128,1,float16,fp8,0,0.014596800506114959
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,8,8,128,1,float16,float16,0,0.013313600420951843
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,8,8,128,1,float16,fp8,0,0.012507200241088867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,8,8,128,1,fp8,fp8,0,0.012492799758911132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,8,1,128,1,float16,float16,0,0.012539200484752655
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,8,1,128,1,float16,fp8,0,0.012511999905109405
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,8,1,128,1,fp8,fp8,0,0.012547199428081513
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,8,2,128,1,float16,float16,0,0.0125231996178627
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,8,2,128,1,float16,fp8,0,0.013097600638866424
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,8,2,128,1,fp8,fp8,0,0.012604799866676331
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,8,4,128,1,float16,fp8,0,0.01252480000257492
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,8,4,128,1,fp8,fp8,0,0.012665599584579468
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,8,8,128,1,float16,float16,0,0.01255200058221817
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,8,8,128,1,float16,fp8,0,0.01271360069513321
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,8,8,128,1,fp8,fp8,0,0.012537600100040435
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,8,1,128,1,float16,float16,0,0.012736000120639801
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,8,4,128,1,float16,fp8,0,0.028622400760650635
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,8,1,128,1,float16,fp8,0,0.01281599998474121
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,8,1,128,1,fp8,fp8,0,0.012567999958992004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,8,2,128,1,float16,float16,0,0.012571200728416443
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,8,2,128,1,float16,fp8,0,0.012494400143623352
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,8,2,128,1,fp8,fp8,0,0.012503999471664428
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,8,4,128,1,float16,float16,0,0.012520000338554382
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,8,4,128,1,float16,fp8,0,0.012620800733566284
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,8,4,128,1,fp8,fp8,0,0.012569600343704223
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,8,1,128,1,float16,fp8,0,0.09723359942436219
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,8,2,128,1,fp8,fp8,0,0.018719999492168425
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,8,1,128,1,float16,float16,0,0.07713279724121094
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,8,2,128,1,float16,float16,0,0.0779263973236084
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,8,2,128,1,float16,fp8,0,0.09783999919891358
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,8,2,128,1,fp8,fp8,0,0.09736480116844178
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,8,4,128,1,float16,float16,0,0.08627039790153504
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,8,4,128,1,float16,fp8,0,0.09851359724998474
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,8,8,128,1,float16,float16,0,0.05730080008506775
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,8,4,128,1,fp8,fp8,0,0.09848160147666932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,8,8,128,1,float16,fp8,0,0.053478401899337766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,8,8,128,1,fp8,fp8,0,0.0534608006477356
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,8,1,128,1,float16,fp8,0,0.0514303982257843
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,8,1,128,1,fp8,fp8,0,0.05140640139579773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,8,2,128,1,float16,float16,0,0.04326080083847046
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,8,2,128,1,float16,fp8,0,0.051425600051879884
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,8,2,128,1,fp8,fp8,0,0.05135999917984009
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,8,8,128,1,fp8,fp8,0,0.01456640064716339
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,8,1,128,1,float16,float16,0,0.012492799758911132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,8,4,128,1,float16,float16,0,0.04779680073261261
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,8,4,128,1,float16,fp8,0,0.051507198810577394
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,8,8,128,1,float16,float16,0,0.034755200147628784
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,8,4,128,1,fp8,fp8,0,0.0516543984413147
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,8,8,128,1,float16,fp8,0,0.03096800148487091
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,8,8,128,1,fp8,fp8,0,0.030931198596954347
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,8,1,128,1,float16,float16,0,0.026263999938964843
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,8,1,128,1,float16,fp8,0,0.030939200520515443
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,8,1,128,1,fp8,fp8,0,0.030881598591804504
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,8,2,128,1,float16,float16,0,0.026505601406097413
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,8,2,128,1,fp8,fp8,0,0.03094240128993988
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,8,2,128,1,float16,fp8,0,0.030928000807762146
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,8,4,128,1,float16,float16,0,0.028911998867988585
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,8,4,128,1,float16,fp8,0,0.030929601192474364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,8,8,128,1,float16,float16,0,0.02032800018787384
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,8,4,128,1,fp8,fp8,0,0.03096800148487091
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,8,8,128,1,float16,fp8,0,0.018649600446224213
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,8,1,128,1,float16,float16,0,0.01656000018119812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,8,8,128,1,fp8,fp8,0,0.018702399730682374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,8,1,128,1,float16,fp8,0,0.018671999871730804
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,8,1,128,1,fp8,fp8,0,0.018667200207710268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,8,2,128,1,float16,float16,0,0.01653279960155487
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,8,2,128,1,float16,fp8,0,0.018676799535751343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,8,2,128,1,fp8,fp8,0,0.018667200207710268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,8,4,128,1,float16,float16,0,0.016579200327396394
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,8,4,128,1,float16,float16,0,0.012492799758911132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,8,8,128,1,float16,float16,0,0.012535999715328216
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,8,8,128,1,float16,fp8,0,0.012567999958992004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,8,8,128,1,fp8,fp8,0,0.012491200119256973
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,8,1,128,1,float16,float16,0,0.012508800625801087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,8,1,128,1,fp8,fp8,0,0.012516799569129943
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,8,2,128,1,float16,float16,0,0.012515200674533844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,8,2,128,1,float16,fp8,0,0.012462399899959564
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,8,2,128,1,fp8,fp8,0,0.012505599856376648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,8,4,128,1,float16,float16,0,0.01250240057706833
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,8,4,128,1,float16,fp8,0,0.012510399520397186
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,8,4,128,1,fp8,fp8,0,0.012555199861526489
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,8,8,128,1,float16,float16,0,0.01249919980764389
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,8,8,128,1,float16,fp8,0,0.012377600371837615
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,8,8,128,1,fp8,fp8,0,0.012505599856376648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,8,1,128,1,fp8,fp8,0,0.09760640263557434
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,8,1,128,1,float16,float16,0,0.010393600165843963
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,8,1,128,1,float16,fp8,0,0.010502400249242783
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,8,1,128,1,fp8,fp8,0,0.010452800244092942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,8,2,128,1,float16,float16,0,0.010487999767065048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,8,2,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,8,2,128,1,fp8,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,8,4,128,1,float16,float16,0,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,8,4,128,1,float16,fp8,0,0.01053439974784851
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,8,8,128,1,float16,float16,0,0.010406400263309478
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,8,8,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,8,8,128,1,fp8,fp8,0,0.01048320010304451
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,8,8,128,1,fp8,fp8,0,0.08781440258026123
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,8,1,128,1,float16,fp8,0,0.010503999888896942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,8,1,128,1,fp8,fp8,0,0.01043040007352829
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,8,2,128,1,float16,float16,0,0.010555200278759003
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,8,2,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,8,2,128,1,fp8,fp8,0,0.010491199791431427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,8,4,128,1,float16,float16,0,0.010492800176143647
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,8,4,128,1,float16,fp8,0,0.010492800176143647
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,8,4,128,1,fp8,fp8,0,0.010475199669599533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,8,4,128,1,float16,fp8,0,0.018680000305175783
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,8,4,128,1,fp8,fp8,0,0.01865919977426529
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,8,8,128,1,float16,float16,0,0.0104592002928257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,8,8,128,1,float16,fp8,0,0.010505600273609162
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,8,1,128,1,float16,float16,0,0.010411199927330018
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,8,1,128,1,float16,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,8,1,128,1,float16,fp8,0,0.012507200241088867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,8,1,128,1,fp8,fp8,0,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,8,2,128,1,float16,float16,0,0.010428799688816071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,8,2,128,1,float16,fp8,0,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,8,4,128,1,float16,float16,0,0.01048159971833229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,8,4,128,1,float16,fp8,0,0.01048159971833229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,8,4,128,1,fp8,fp8,0,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,8,8,128,1,float16,float16,0,0.010366400331258773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,8,8,128,1,float16,fp8,0,0.010444799810647965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,8,8,128,1,fp8,fp8,0,0.010486400127410889
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,8,1,128,1,float16,fp8,0,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,8,1,128,1,float16,float16,0,0.010815999656915664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,8,1,128,1,fp8,fp8,0,0.01040479987859726
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,8,2,128,1,float16,float16,0,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,8,2,128,1,float16,fp8,0,0.010371199995279311
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,8,2,128,1,fp8,fp8,0,0.010444799810647965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,8,4,128,1,float16,float16,0,0.01037919968366623
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,8,4,128,1,float16,fp8,0,0.010486400127410889
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,8,4,128,1,fp8,fp8,0,0.010428799688816071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,8,1,128,1,float16,fp8,0,0.07020320296287537
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,8,1,128,1,fp8,fp8,0,0.07027040123939514
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,8,2,128,1,float16,float16,0,0.06403840184211732
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,8,4,128,1,fp8,fp8,0,0.010518400371074677
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,8,2,128,1,float16,fp8,0,0.07022719979286193
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,8,1,128,1,float16,float16,0,0.042084801197052005
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,8,2,128,1,fp8,fp8,0,0.07131680250167846
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,8,4,128,1,float16,float16,0,0.06893759965896606
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,8,4,128,1,float16,fp8,0,0.07174720168113709
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,8,8,128,1,float16,float16,0,0.043171200156211856
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,8,4,128,1,fp8,fp8,0,0.0718671977519989
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,8,8,128,1,float16,fp8,0,0.039108800888061526
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,8,1,128,1,float16,float16,0,0.035076799988746646
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,8,1,128,1,float16,fp8,0,0.03911679983139038
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,8,8,128,1,fp8,fp8,0,0.03914720118045807
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,8,1,128,1,fp8,fp8,0,0.03909600079059601
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,8,2,128,1,float16,float16,0,0.035339200496673585
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,8,2,128,1,float16,fp8,0,0.039190399646759036
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,8,2,128,1,fp8,fp8,0,0.039129599928855896
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,8,4,128,1,float16,float16,0,0.037115201354026794
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,8,4,128,1,float16,fp8,0,0.039131200313568114
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,8,4,128,1,fp8,fp8,0,0.0391184002161026
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,8,8,128,1,float16,float16,0,0.02486560046672821
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,8,8,128,1,float16,fp8,0,0.02476000040769577
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,8,8,128,1,fp8,fp8,0,0.024799999594688416
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,8,1,128,1,float16,float16,0,0.022776000201702118
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,8,1,128,1,fp8,fp8,0,0.02481440007686615
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,8,2,128,1,float16,float16,0,0.022761599719524385
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,8,2,128,1,float16,fp8,0,0.02475679963827133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,8,2,128,1,fp8,fp8,0,0.024771200120449068
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,8,4,128,1,float16,float16,0,0.02276639938354492
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,8,4,128,1,float16,fp8,0,0.02479359954595566
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,8,8,128,1,float16,float16,0,0.01655679941177368
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,8,4,128,1,fp8,fp8,0,0.02481919974088669
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,8,8,128,1,float16,fp8,0,0.016505600512027742
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,8,8,128,1,fp8,fp8,0,0.016625599563121797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,8,1,128,1,float16,fp8,0,0.016631999611854555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,8,1,128,1,float16,float16,0,0.014500799775123595
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,8,1,128,1,fp8,fp8,0,0.016577599942684172
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,8,2,128,1,float16,float16,0,0.01456640064716339
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,8,8,128,1,fp8,fp8,0,0.010444799810647965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,8,2,128,1,float16,fp8,0,0.016624000668525696
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,8,2,128,1,fp8,fp8,0,0.016590400040149687
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,8,4,128,1,float16,float16,0,0.014596800506114959
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,8,4,128,1,fp8,fp8,0,0.016606399416923524
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,8,8,128,1,float16,float16,0,0.012403199821710587
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,8,2,128,1,fp8,fp8,0,0.01048320010304451
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,8,4,128,1,float16,fp8,0,0.016631999611854555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,8,8,128,1,float16,fp8,0,0.010731200128793717
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,8,8,128,1,fp8,fp8,0,0.010856000334024429
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,8,1,128,1,float16,float16,0,0.010523200035095215
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,8,1,128,1,float16,fp8,0,0.012176000326871873
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,8,1,128,1,fp8,fp8,0,0.010547199845314026
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,8,2,128,1,float16,float16,0,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,8,2,128,1,fp8,fp8,0,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,8,4,128,1,float16,fp8,0,0.012399999797344208
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,8,4,128,1,fp8,fp8,0,0.01048320010304451
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,8,8,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,8,8,128,1,float16,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,8,8,128,1,fp8,fp8,0,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,8,1,128,1,float16,float16,0,0.01043040007352829
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,8,1,128,1,float16,fp8,0,0.010407999902963639
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,8,1,128,1,fp8,fp8,0,0.01048479974269867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,8,2,128,1,float16,float16,0,0.010351999849081039
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,8,2,128,1,float16,fp8,0,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,8,2,128,1,fp8,fp8,0,0.010356800258159637
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,8,4,128,1,float16,float16,0,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,8,1,128,1,float16,float16,0,0.06230400204658508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,8,4,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,8,4,128,1,fp8,fp8,0,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,8,8,128,1,float16,float16,0,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,8,8,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,8,8,128,1,fp8,fp8,0,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,8,1,128,1,float16,float16,0,0.010387200117111205
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,8,1,128,1,float16,fp8,0,0.010384000092744827
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,8,1,128,1,fp8,fp8,0,0.010372799634933472
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,8,2,128,1,float16,fp8,0,0.01037919968366623
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,8,2,128,1,fp8,fp8,0,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,8,4,128,1,float16,fp8,0,0.01051200032234192
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,8,4,128,1,fp8,fp8,0,0.010497599840164185
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,8,8,128,1,float16,float16,0,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,8,8,128,1,float16,fp8,0,0.01037919968366623
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,8,1,128,1,float16,float16,0,0.010497599840164185
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,8,1,128,1,float16,float16,0,0.010436800122261048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,8,1,128,1,float16,fp8,0,0.010356800258159637
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,8,1,128,1,fp8,fp8,0,0.010427200049161912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,8,2,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,8,1,128,1,float16,fp8,0,0.024697600305080412
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,8,2,128,1,float16,fp8,0,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,8,2,128,1,fp8,fp8,0,0.01053600013256073
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,8,4,128,1,float16,fp8,0,0.010385599732398988
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,8,4,128,1,fp8,fp8,0,0.010486400127410889
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,8,8,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,8,8,128,1,float16,fp8,0,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,8,8,128,1,fp8,fp8,0,0.010425599664449692
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,8,1,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,8,1,128,1,float16,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,8,1,128,1,fp8,fp8,0,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,8,4,128,1,float16,float16,0,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,8,2,128,1,float16,float16,0,0.010428799688816071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,8,2,128,1,float16,fp8,0,0.010388799756765366
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,8,2,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,8,4,128,1,float16,float16,0,0.01037440001964569
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,8,4,128,1,float16,fp8,0,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,8,4,128,1,fp8,fp8,0,0.010366400331258773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,8,1,128,1,float16,float16,0,0.05532479882240295
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,8,1,128,1,float16,fp8,0,0.05826240181922913
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,8,1,128,1,fp8,fp8,0,0.057817602157592775
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,8,2,128,1,float16,float16,0,0.05609920024871826
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,8,2,128,1,float16,fp8,0,0.05768960118293762
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,8,2,128,1,fp8,fp8,0,0.05798879861831665
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,8,4,128,1,float16,float16,0,0.057467198371887206
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,8,2,128,1,float16,float16,0,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,8,4,128,1,fp8,fp8,0,0.058580797910690305
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,8,8,128,1,float16,float16,0,0.035017600655555724
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,8,4,128,1,float16,float16,0,0.010363200306892395
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,8,8,128,1,float16,fp8,0,0.033025598526000975
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,8,8,128,1,fp8,fp8,0,0.033046400547027587
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,8,1,128,1,float16,float16,0,0.031483200192451474
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,8,1,128,1,float16,fp8,0,0.03298240005970001
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,8,2,128,1,float16,float16,0,0.03275200128555298
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,8,2,128,1,float16,fp8,0,0.0329008013010025
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,8,2,128,1,fp8,fp8,0,0.03289600014686585
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,8,4,128,1,float16,float16,0,0.03298879861831665
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,8,4,128,1,float16,fp8,0,0.0329584002494812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,8,4,128,1,fp8,fp8,0,0.032948800921440126
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,8,8,128,1,float16,float16,0,0.02264000028371811
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,8,8,128,1,float16,fp8,0,0.020692799985408784
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,8,8,128,1,fp8,fp8,0,0.020734399557113647
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,8,1,128,1,float16,float16,0,0.020678399503231047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,8,1,128,1,float16,fp8,0,0.020708799362182617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,8,8,128,1,fp8,fp8,0,0.010407999902963639
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,8,1,128,1,fp8,fp8,0,0.020707200467586517
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,8,2,128,1,float16,float16,0,0.020715199410915375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,8,2,128,1,float16,fp8,0,0.02074880003929138
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,8,2,128,1,fp8,fp8,0,0.020718400180339814
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,8,4,128,1,float16,float16,0,0.020788800716400147
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,8,4,128,1,float16,fp8,0,0.02070239931344986
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,8,4,128,1,fp8,fp8,0,0.020708799362182617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,8,8,128,1,float16,fp8,0,0.014486399292945863
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,8,8,128,1,fp8,fp8,0,0.014577600359916686
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,8,1,128,1,float16,float16,0,0.014151999354362487
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,8,1,128,1,float16,fp8,0,0.014579200744628906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,8,2,128,1,float16,float16,0,0.014478400349617004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,8,1,128,1,fp8,fp8,0,0.014451199769973755
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,8,4,128,1,float16,float16,0,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,8,2,128,1,float16,fp8,0,0.014535999298095703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,8,2,128,1,fp8,fp8,0,0.014478400349617004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,8,4,128,1,float16,float16,0,0.01451680064201355
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,8,4,128,1,float16,fp8,0,0.01454399973154068
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,8,4,128,1,fp8,fp8,0,0.014483200013637542
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,8,8,128,1,float16,float16,0,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,8,8,128,1,float16,fp8,0,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,8,8,128,1,fp8,fp8,0,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,8,1,128,1,float16,float16,0,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,8,1,128,1,float16,fp8,0,0.010416000336408614
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,8,1,128,1,fp8,fp8,0,0.01037440001964569
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,8,2,128,1,float16,float16,0,0.01043040007352829
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,8,2,128,1,float16,fp8,0,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,8,2,128,1,fp8,fp8,0,0.010524799674749374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,8,4,128,1,float16,float16,0,0.010497599840164185
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,8,4,128,1,float16,fp8,0,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,8,4,128,1,fp8,fp8,0,0.010412800312042236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,8,8,128,1,float16,float16,0,0.0104032002389431
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,8,8,128,1,float16,fp8,0,0.010411199927330018
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,8,8,128,1,fp8,fp8,0,0.010342399775981902
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,8,1,128,1,float16,float16,0,0.010422399640083313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,8,1,128,1,float16,fp8,0,0.010281600058078766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,8,1,128,1,fp8,fp8,0,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,8,2,128,1,float16,float16,0,0.010406400263309478
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,8,2,128,1,float16,fp8,0,0.0104032002389431
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,8,2,128,1,fp8,fp8,0,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,8,4,128,1,float16,float16,0,0.01040479987859726
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,8,4,128,1,fp8,fp8,0,0.010407999902963639
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,8,2,128,1,float16,fp8,0,0.01252640038728714
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,8,4,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,8,8,128,1,float16,fp8,0,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,8,8,128,1,fp8,fp8,0,0.010382399708032609
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,8,1,128,1,float16,float16,0,0.009340800344944
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,8,1,128,1,float16,fp8,0,0.010311999917030334
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,8,1,128,1,fp8,fp8,0,0.010494399815797806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,8,2,128,1,float16,float16,0,0.010385599732398988
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,8,2,128,1,float16,fp8,0,0.009747199714183807
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,8,2,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,8,4,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,8,4,128,1,float16,fp8,0,0.009830400347709656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,8,4,128,1,fp8,fp8,0,0.010276799649000167
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,8,8,128,1,float16,float16,0,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,8,8,128,1,float16,fp8,0,0.008900800347328186
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,8,8,128,1,fp8,fp8,0,0.009548799693584442
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,8,1,128,1,float16,float16,0,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,8,1,128,1,fp8,fp8,0,0.00851999968290329
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,8,2,128,1,float16,float16,0,0.009393599629402161
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,8,1,128,1,float16,fp8,0,0.00876000002026558
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,8,2,128,1,float16,fp8,0,0.009177599847316743
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,8,2,128,1,fp8,fp8,0,0.00880960002541542
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,8,4,128,1,float16,float16,0,0.009703999757766724
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,8,4,128,1,float16,fp8,0,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,8,4,128,1,fp8,fp8,0,0.008524800091981888
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,8,8,128,1,float16,float16,0,0.009363199770450591
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,8,4,128,1,float16,fp8,0,0.057897597551345825
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,8,8,128,1,float16,fp8,0,0.010390400141477584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,8,8,128,1,fp8,fp8,0,0.00846560001373291
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,8,1,128,1,float16,float16,0,0.009062399715185165
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,8,1,128,1,float16,fp8,0,0.008526399731636047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,8,1,128,1,fp8,fp8,0,0.010214400291442872
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,8,2,128,1,float16,float16,0,0.00945120006799698
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,8,2,128,1,float16,fp8,0,0.008617600053548813
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,8,2,128,1,fp8,fp8,0,0.00878399983048439
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,8,4,128,1,float16,float16,0,0.010380800068378448
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,8,4,128,1,float16,fp8,0,0.008448000252246856
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,8,1,128,1,fp8,fp8,0,0.03307200074195862
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,8,1,128,1,float16,fp8,0,0.05278400182723999
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,8,1,128,1,float16,float16,0,0.05410720109939575
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,8,1,128,1,fp8,fp8,0,0.05304480195045471
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,8,2,128,1,float16,fp8,0,0.05321279764175415
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,8,2,128,1,fp8,fp8,0,0.053255999088287355
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,8,4,128,1,float16,float16,0,0.05527840256690979
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,8,4,128,1,float16,fp8,0,0.05336800217628479
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,8,4,128,1,fp8,fp8,0,0.0531328022480011
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,8,8,128,1,float16,float16,0,0.032995200157165526
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,8,8,128,1,float16,float16,0,0.01451839953660965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,8,8,128,1,fp8,fp8,0,0.03105120062828064
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,8,1,128,1,float16,fp8,0,0.030937600135803222
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,8,2,128,1,float16,float16,0,0.03094879984855652
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,8,2,128,1,float16,fp8,0,0.030811199545860292
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,8,2,128,1,fp8,fp8,0,0.030987200140953065
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,8,4,128,1,float16,float16,0,0.03142400085926056
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,8,4,128,1,float16,fp8,0,0.030870398879051207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,8,4,128,1,fp8,fp8,0,0.030806401371955873
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,8,8,128,1,float16,float16,0,0.02067359983921051
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,8,8,128,1,float16,fp8,0,0.019836799800395967
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,8,8,128,1,fp8,fp8,0,0.02070239931344986
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,8,1,128,1,float16,float16,0,0.020633600652217865
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,8,1,128,1,float16,fp8,0,0.020633600652217865
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,8,8,128,1,float16,float16,0,0.009393599629402161
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,8,1,128,1,fp8,fp8,0,0.020347200334072113
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,8,2,128,1,float16,float16,0,0.020644800364971162
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,8,2,128,1,float16,fp8,0,0.02051199972629547
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,8,2,128,1,fp8,fp8,0,0.01977439969778061
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,8,4,128,1,float16,fp8,0,0.019753600656986236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,8,4,128,1,fp8,fp8,0,0.01958079934120178
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,8,8,128,1,float16,float16,0,0.014532800018787383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,8,8,128,1,float16,fp8,0,0.013211199641227722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,8,8,128,1,fp8,fp8,0,0.013283200562000275
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,8,4,128,1,fp8,fp8,0,0.01005920022726059
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,8,1,128,1,float16,fp8,0,0.013011200726032257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,8,1,128,1,fp8,fp8,0,0.012931199371814727
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,8,2,128,1,float16,float16,0,0.05402399897575379
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,8,2,128,1,float16,float16,0,0.014299200475215912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,8,2,128,1,float16,fp8,0,0.013980799913406372
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,8,2,128,1,fp8,fp8,0,0.012542399764060973
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,8,4,128,1,float16,float16,0,0.014552000164985656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,8,4,128,1,float16,fp8,0,0.012489599734544754
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,8,8,128,1,float16,fp8,0,0.030964800715446474
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,8,8,128,1,float16,float16,0,0.01037440001964569
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,8,1,128,1,float16,float16,0,0.031241598725318908
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,8,8,128,1,float16,fp8,0,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,8,8,128,1,fp8,fp8,0,0.010366400331258773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,8,1,128,1,float16,float16,0,0.010478399693965912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,8,1,128,1,fp8,fp8,0,0.031017601490020752
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,8,1,128,1,fp8,fp8,0,0.010363200306892395
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,8,2,128,1,float16,float16,0,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,8,2,128,1,float16,fp8,0,0.010446400195360184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,8,4,128,1,float16,float16,0,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,8,4,128,1,float16,fp8,0,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,8,4,128,1,fp8,fp8,0,0.010435199737548828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,8,8,128,1,float16,float16,0,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,8,8,128,1,float16,fp8,0,0.010302399843931198
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,8,8,128,1,fp8,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,8,1,128,1,float16,float16,0,0.010326399654150008
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,8,1,128,1,float16,fp8,0,0.010497599840164185
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,8,1,128,1,fp8,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,8,2,128,1,float16,float16,0,0.01048159971833229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,8,2,128,1,float16,fp8,0,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,8,2,128,1,fp8,fp8,0,0.010500799864530563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,8,4,128,1,float16,float16,0,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,8,4,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,8,8,128,1,float16,float16,0,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,8,8,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,8,8,128,1,fp8,fp8,0,0.010390400141477584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,8,1,128,1,float16,float16,0,0.01003199964761734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,8,1,128,1,float16,fp8,0,0.009033600240945816
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,8,1,128,1,fp8,fp8,0,0.008959999680519104
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,8,2,128,1,float16,float16,0,0.010132800042629241
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,8,4,128,1,float16,float16,0,0.020608000457286835
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,8,2,128,1,fp8,fp8,0,0.009172800183296203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,8,4,128,1,float16,float16,0,0.009939199686050415
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,8,4,128,1,float16,fp8,0,0.008844800293445587
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,8,4,128,1,fp8,fp8,0,0.01037440001964569
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,8,8,128,1,float16,float16,0,0.010367999970912933
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,8,8,128,1,float16,fp8,0,0.010388799756765366
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,8,1,128,1,float16,float16,0,0.014534400403499603
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,8,8,128,1,fp8,fp8,0,0.010371199995279311
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,8,1,128,1,float16,float16,0,0.009404800087213516
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,8,1,128,1,float16,fp8,0,0.009960000216960908
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,8,1,128,1,fp8,fp8,0,0.009321600198745728
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,8,2,128,1,float16,float16,0,0.010372799634933472
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,8,2,128,1,float16,fp8,0,0.00910080000758171
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,8,2,128,1,fp8,fp8,0,0.01016160026192665
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,8,4,128,1,float16,float16,0,0.010438399761915207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,8,4,128,1,float16,fp8,0,0.010372799634933472
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,8,4,128,1,fp8,fp8,0,0.009132800251245498
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,8,8,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,8,8,128,1,float16,fp8,0,0.008396799862384795
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,8,4,128,1,fp8,fp8,0,0.014392000436782838
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,8,8,128,1,fp8,fp8,0,0.00923520028591156
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,8,1,128,1,float16,float16,0,0.009233599901199341
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,8,1,128,1,fp8,fp8,0,0.009155199676752091
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,8,2,128,1,float16,float16,0,0.008406399935483932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,8,1,128,1,float16,fp8,0,0.010416000336408614
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,8,2,128,1,float16,fp8,0,0.008408000320196151
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,8,2,128,1,fp8,fp8,0,0.010356800258159637
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,8,4,128,1,float16,fp8,0,0.008463999629020691
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,8,1,128,1,float16,fp8,0,0.048651200532913205
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,8,4,128,1,fp8,fp8,0,0.008473599702119828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,8,1,128,1,float16,float16,0,0.05252640247344971
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,8,1,128,1,fp8,fp8,0,0.048014399409294126
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,8,2,128,1,float16,float16,0,0.052211201190948485
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,8,2,128,1,float16,fp8,0,0.04896160066127777
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,8,2,128,1,fp8,fp8,0,0.0485040009021759
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,8,4,128,1,float16,float16,0,0.0521232008934021
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,8,4,128,1,float16,fp8,0,0.048974400758743285
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,8,4,128,1,fp8,fp8,0,0.04858559966087341
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,8,8,128,1,float16,float16,0,0.030687999725341798
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,8,8,128,1,float16,fp8,0,0.02877599895000458
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,8,8,128,1,fp8,fp8,0,0.02832320034503937
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,8,1,128,1,float16,float16,0,0.030833598971366883
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,8,1,128,1,float16,fp8,0,0.02879520058631897
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,8,1,128,1,fp8,fp8,0,0.028887999057769776
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,8,2,128,1,float16,float16,0,0.03079040050506592
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,8,2,128,1,float16,fp8,0,0.028958401083946227
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,8,2,128,1,fp8,fp8,0,0.028806400299072266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,8,4,128,1,float16,float16,0,0.030697599053382874
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,8,4,128,1,float16,fp8,0,0.02858720123767853
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,8,4,128,1,fp8,fp8,0,0.0286080002784729
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,8,8,128,1,float16,fp8,0,0.0186271995306015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,8,8,128,1,fp8,fp8,0,0.01871040016412735
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,8,1,128,1,float16,float16,0,0.020180800557136537
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,8,1,128,1,float16,fp8,0,0.018617600202560425
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,8,1,128,1,fp8,fp8,0,0.018632000684738158
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,8,2,128,1,float16,float16,0,0.019270400702953338
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,8,2,128,1,float16,fp8,0,0.01866080015897751
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,8,2,128,1,fp8,fp8,0,0.018617600202560425
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,8,4,128,1,float16,float16,0,0.02038400024175644
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,8,4,128,1,float16,fp8,0,0.018606400489807128
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,8,4,128,1,fp8,fp8,0,0.018619200587272643
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,8,8,128,1,float16,float16,0,0.014500799775123595
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,8,8,128,1,float16,fp8,0,0.012873600423336028
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,8,8,128,1,fp8,fp8,0,0.012577599287033081
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,8,1,128,1,float16,fp8,0,0.008403199911117553
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,8,1,128,1,float16,float16,0,0.01456640064716339
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,8,1,128,1,float16,fp8,0,0.0125231996178627
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,8,1,128,1,fp8,fp8,0,0.012510399520397186
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,8,2,128,1,float16,float16,0,0.014531199634075165
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,8,2,128,1,float16,fp8,0,0.012540799379348756
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,8,2,128,1,fp8,fp8,0,0.01252799928188324
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,8,4,128,1,float16,float16,0,0.01343040019273758
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,8,2,128,1,fp8,fp8,0,0.008451200276613235
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,8,4,128,1,float16,fp8,0,0.012521600723266602
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,8,8,128,1,float16,fp8,0,0.010384000092744827
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,8,8,128,1,float16,float16,0,0.010503999888896942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,8,8,128,1,fp8,fp8,0,0.010367999970912933
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,8,1,128,1,float16,float16,0,0.010507199913263321
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,8,1,128,1,float16,fp8,0,0.010412800312042236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,8,1,128,1,fp8,fp8,0,0.010385599732398988
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,8,2,128,1,float16,float16,0,0.010452800244092942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,8,2,128,1,float16,fp8,0,0.01051200032234192
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,8,2,128,1,fp8,fp8,0,0.010444799810647965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,8,4,128,1,fp8,fp8,0,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,8,4,128,1,float16,float16,0,0.010555200278759003
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,8,4,128,1,fp8,fp8,0,0.010428799688816071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,8,8,128,1,float16,float16,0,0.010526400059461594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,8,4,128,1,float16,fp8,0,0.010387200117111205
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,8,8,128,1,fp8,fp8,0,0.009558399766683578
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,8,1,128,1,float16,float16,0,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,8,1,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,8,1,128,1,fp8,fp8,0,0.010372799634933472
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,8,2,128,1,float16,float16,0,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,8,2,128,1,float16,fp8,0,0.01034879982471466
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,8,2,128,1,fp8,fp8,0,0.010419200360774993
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,8,4,128,1,float16,float16,0,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,8,4,128,1,float16,fp8,0,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,8,8,128,1,float16,float16,0,0.018750399351119995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,8,4,128,1,fp8,fp8,0,0.010147199779748917
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,8,8,128,1,float16,float16,0,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,8,8,128,1,float16,fp8,0,0.009214399755001068
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,8,8,128,1,fp8,fp8,0,0.009033600240945816
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,8,2,128,1,float16,fp8,0,0.009440000355243682
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,8,1,128,1,float16,fp8,0,0.009046400338411332
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,8,1,128,1,fp8,fp8,0,0.008419200032949447
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,8,2,128,1,float16,float16,0,0.010224000364542008
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,8,2,128,1,float16,fp8,0,0.008635199815034866
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,8,2,128,1,fp8,fp8,0,0.008900800347328186
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,8,4,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,8,4,128,1,float16,fp8,0,0.008894400298595428
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,8,4,128,1,fp8,fp8,0,0.010364799946546554
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,8,8,128,1,float16,float16,0,0.01048479974269867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,8,8,128,1,float16,fp8,0,0.008432000130414962
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,8,8,128,1,fp8,fp8,0,0.009415999799966813
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,8,1,128,1,float16,float16,0,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,8,1,128,1,float16,fp8,0,0.008388800173997879
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,8,1,128,1,fp8,fp8,0,0.009759999811649323
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,8,2,128,1,float16,float16,0,0.010796800255775452
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,8,2,128,1,float16,fp8,0,0.009641599655151368
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,8,2,128,1,fp8,fp8,0,0.00952799990773201
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,8,4,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,8,4,128,1,float16,fp8,0,0.009436800330877303
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,8,4,128,1,fp8,fp8,0,0.00838399976491928
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,8,8,128,1,float16,float16,0,0.010475199669599533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,8,8,128,1,fp8,fp8,0,0.008395200222730636
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,8,1,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,8,8,128,1,float16,fp8,0,0.009140799939632415
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,8,1,128,1,float16,fp8,0,0.008441600203514098
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,8,1,128,1,fp8,fp8,0,0.00904799997806549
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,8,2,128,1,float16,float16,0,0.01037440001964569
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,8,2,128,1,fp8,fp8,0,0.00942239984869957
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,8,4,128,1,float16,float16,0,0.010331200063228607
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,8,2,128,1,float16,fp8,0,0.008392000198364257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,8,4,128,1,fp8,fp8,0,0.009008000046014786
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,8,4,128,1,float16,fp8,0,0.009307199716567993
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,8,4,128,1,float16,float16,0,0.008460800349712371
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,8,4,128,1,fp8,fp8,0,0.012491200119256973
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,8,8,128,1,float16,fp8,0,0.009019199758768082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,4,1,128,1,float16,fp8,0,1.5843119621276855
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,4,1,128,1,float16,float16,0,1.7357263565063477
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,4,1,128,1,fp8,fp8,0,1.5912015914916993
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,8,1,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,4,2,128,1,float16,fp8,0,1.5910256385803223
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,4,2,128,1,float16,float16,0,1.7235040664672852
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,4,4,128,1,float16,fp8,0,0.8570528030395508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,4,1,128,1,float16,float16,0,0.9046751976013183
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,4,4,128,1,float16,float16,0,0.9454719543457031
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,4,2,128,1,fp8,fp8,0,1.5920399665832519
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,4,1,128,1,float16,fp8,0,0.8558671951293946
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,4,1,128,1,fp8,fp8,0,0.856719970703125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,4,2,128,1,float16,float16,0,0.9249936103820801
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,4,2,128,1,float16,fp8,0,0.8557855606079101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,4,4,128,1,float16,float16,0,0.5230239868164063
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,4,2,128,1,fp8,fp8,0,0.9208127975463867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,4,4,128,1,float16,fp8,0,0.49366397857666017
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,4,4,128,1,fp8,fp8,0,0.49306402206420896
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,4,1,128,1,float16,fp8,0,0.48998398780822755
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,4,1,128,1,fp8,fp8,0,0.4920959949493408
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,4,1,128,1,float16,float16,0,0.575222396850586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,4,2,128,1,float16,float16,0,0.5207888126373291
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,4,2,128,1,float16,fp8,0,0.4901792049407959
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,4,4,128,1,float16,float16,0,0.32447841167449953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,4,4,128,1,float16,fp8,0,0.30994720458984376
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,4,4,128,1,fp8,fp8,0,0.3556848049163818
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,4,1,128,1,float16,float16,0,0.32596960067749026
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,4,1,128,1,fp8,fp8,0,0.308351993560791
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,4,1,128,1,float16,fp8,0,0.3081183910369873
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,4,2,128,1,float16,float16,0,0.3189824104309082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,4,2,128,1,float16,fp8,0,0.3283312082290649
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,4,2,128,1,fp8,fp8,0,0.31548800468444826
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,4,1,128,1,float16,fp8,0,0.9607184410095215
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,4,1,128,1,float16,float16,0,1.0051648139953613
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,4,1,128,1,fp8,fp8,0,0.9650464057922363
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,4,2,128,1,float16,float16,0,1.0190896034240722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,4,2,128,1,float16,fp8,0,0.9673744201660156
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,4,4,128,1,fp8,fp8,0,0.9567343711853027
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,4,4,128,1,float16,float16,0,0.5635151863098145
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,4,4,128,1,fp8,fp8,0,0.549564790725708
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,4,1,128,1,float16,float16,0,0.542844820022583
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,4,1,128,1,float16,fp8,0,0.5299583911895752
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,4,1,128,1,fp8,fp8,0,0.5499951839447021
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,4,2,128,1,float16,float16,0,0.5519440174102783
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,4,2,128,1,float16,fp8,0,0.5394911766052246
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,4,2,128,1,fp8,fp8,0,0.5311439990997314
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,4,4,128,1,float16,float16,0,0.33128321170806885
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,4,4,128,1,float16,fp8,0,0.3125008106231689
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,4,4,128,1,fp8,fp8,0,0.31624319553375246
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,4,1,128,1,float16,float16,0,0.313369607925415
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,4,1,128,1,float16,fp8,0,0.3146048069000244
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,4,1,128,1,fp8,fp8,0,0.31084480285644533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,4,2,128,1,float16,float16,0,0.316921591758728
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,4,2,128,1,float16,fp8,0,0.31013600826263427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,4,4,128,1,float16,float16,0,0.20773439407348632
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,4,2,128,1,fp8,fp8,0,0.31184000968933107
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,4,4,128,1,float16,fp8,0,0.2022576093673706
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,4,4,128,1,fp8,fp8,0,0.19976320266723632
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,4,1,128,1,float16,float16,0,0.20283041000366211
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,4,1,128,1,float16,fp8,0,0.19995039701461792
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,4,1,128,1,fp8,fp8,0,0.20107998847961425
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,4,2,128,1,float16,float16,0,0.20626399517059327
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,4,2,128,1,float16,fp8,0,0.19950079917907715
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,4,2,128,1,fp8,fp8,0,0.1990399956703186
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,4,1,128,1,float16,float16,0,0.7265423774719239
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,4,1,128,1,float16,fp8,0,0.7112624168395996
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,4,1,128,1,fp8,fp8,0,0.7081567764282226
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,4,2,128,1,float16,float16,0,0.7283455848693847
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,4,2,128,1,float16,fp8,0,0.7089632034301758
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,4,4,128,1,float16,float16,0,0.41329278945922854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,4,4,128,1,float16,fp8,0,0.3973072052001953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,4,4,128,1,float16,fp8,0,0.5321631908416748
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,4,4,128,1,fp8,fp8,0,0.39792640209198
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,4,1,128,1,float16,float16,0,0.4002208232879639
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,4,2,128,1,fp8,fp8,0,0.7088607788085938
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,4,2,128,1,fp8,fp8,0,0.9621055603027344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,4,1,128,1,float16,fp8,0,0.39391839504241943
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,4,4,128,1,float16,float16,0,0.2507359981536865
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,4,2,128,1,float16,float16,0,0.39772961139678953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,4,1,128,1,fp8,fp8,0,0.3940975904464722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,4,2,128,1,fp8,fp8,0,0.3963887929916382
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,4,4,128,1,fp8,fp8,0,0.2406752109527588
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,4,1,128,1,float16,float16,0,0.24229118824005128
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,4,1,128,1,float16,fp8,0,0.24202880859375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,4,1,128,1,fp8,fp8,0,0.24166080951690674
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,4,2,128,1,float16,float16,0,0.25335519313812255
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,4,2,128,1,float16,fp8,0,0.24026880264282227
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,4,2,128,1,fp8,fp8,0,0.2414911985397339
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,4,4,128,1,fp8,fp8,0,0.14833920001983641
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,4,1,128,1,float16,float16,0,0.14923520088195802
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,4,1,128,1,float16,fp8,0,0.14679360389709473
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,4,1,128,1,fp8,fp8,0,0.1476863980293274
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,4,2,128,1,fp8,fp8,0,0.49015040397644044
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,4,2,128,1,float16,fp8,0,0.14690239429473878
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,4,2,128,1,fp8,fp8,0,0.14849120378494263
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,4,1,128,1,float16,float16,0,0.9162015914916992
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,4,1,128,1,float16,fp8,0,0.9124208450317383
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,4,1,128,1,fp8,fp8,0,0.9143183708190918
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,4,2,128,1,float16,float16,0,0.9213999748229981
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,4,2,128,1,float16,fp8,0,0.9129072189331054
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,4,2,128,1,fp8,fp8,0,0.9152511596679688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,4,2,128,1,float16,fp8,0,0.3954463958740234
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,4,4,128,1,float16,float16,0,0.5109839916229248
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,4,4,128,1,fp8,fp8,0,0.49338560104370116
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,4,4,128,1,float16,float16,0,0.15258400440216063
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,4,4,128,1,float16,fp8,0,0.150491201877594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,4,1,128,1,fp8,fp8,0,0.49228482246398925
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,4,2,128,1,float16,float16,0,0.14983999729156494
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,4,2,128,1,float16,float16,0,0.4861839771270752
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,4,2,128,1,float16,fp8,0,0.490718412399292
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,4,2,128,1,fp8,fp8,0,0.49231839179992676
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,4,4,128,1,float16,fp8,0,0.28248000144958496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,4,4,128,1,fp8,fp8,0,0.2822688102722168
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,4,1,128,1,float16,float16,0,0.27569119930267333
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,4,1,128,1,float16,fp8,0,0.279801607131958
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,4,1,128,1,fp8,fp8,0,0.2805216073989868
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,4,2,128,1,float16,fp8,0,0.280019211769104
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,4,2,128,1,fp8,fp8,0,0.2803791999816895
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,4,4,128,1,float16,float16,0,0.18155039548873902
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,4,4,128,1,float16,fp8,0,0.4920095920562744
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,4,4,128,1,float16,fp8,0,0.24079680442810059
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,4,4,128,1,float16,fp8,0,0.17685920000076294
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,4,4,128,1,fp8,fp8,0,0.17632800340652466
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,4,1,128,1,float16,float16,0,0.1756592035293579
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,4,1,128,1,float16,fp8,0,0.17593439817428588
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,4,1,128,1,float16,float16,0,0.4879631996154785
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,4,1,128,1,float16,fp8,0,0.4912384033203125
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,4,2,128,1,float16,float16,0,0.1756351947784424
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,4,1,128,1,fp8,fp8,0,0.1767248034477234
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,4,2,128,1,float16,fp8,0,0.17568800449371338
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,4,4,128,1,float16,float16,0,0.1140239953994751
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,4,4,128,1,float16,fp8,0,0.11138399839401245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,4,4,128,1,fp8,fp8,0,0.11197600364685059
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,4,1,128,1,float16,float16,0,0.11251519918441773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,4,1,128,1,float16,fp8,0,0.11159360408782959
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,4,1,128,1,fp8,fp8,0,0.11104639768600463
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,4,2,128,1,float16,float16,0,0.11263999938964844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,4,2,128,1,float16,fp8,0,0.11098239421844483
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,4,2,128,1,fp8,fp8,0,0.11131520271301269
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,4,4,128,1,float16,float16,0,0.28745760917663576
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,4,1,128,1,float16,float16,0,0.5472271919250489
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,4,1,128,1,float16,fp8,0,0.5668272018432617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,4,2,128,1,fp8,fp8,0,0.5670063972473145
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,4,4,128,1,float16,float16,0,0.315447998046875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,4,4,128,1,float16,fp8,0,0.3123807907104492
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,4,2,128,1,float16,float16,0,0.2760015964508057
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,4,1,128,1,float16,float16,0,0.2969712018966675
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,4,1,128,1,float16,fp8,0,0.3109568119049072
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,4,1,128,1,fp8,fp8,0,0.3099168062210083
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,4,2,128,1,float16,float16,0,0.29818239212036135
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,4,2,128,1,float16,fp8,0,0.31106560230255126
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,4,4,128,1,float16,float16,0,0.1846959948539734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,4,2,128,1,fp8,fp8,0,0.3123696088790894
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,4,4,128,1,float16,fp8,0,0.18153760433197022
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,4,4,128,1,fp8,fp8,0,0.18219679594039917
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,4,1,128,1,float16,fp8,0,0.18081599473953247
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,4,1,128,1,fp8,fp8,0,0.1813199996948242
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,4,2,128,1,float16,float16,0,0.17532639503479003
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,4,2,128,1,float16,fp8,0,0.1809391975402832
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,4,4,128,1,float16,float16,0,0.11870399713516236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,4,2,128,1,fp8,fp8,0,0.18166879415512086
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,4,2,128,1,fp8,fp8,0,0.17469600439071656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,4,4,128,1,float16,fp8,0,0.11655999422073364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,4,4,128,1,fp8,fp8,0,0.11595679521560669
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,4,1,128,1,float16,float16,0,0.11327840089797973
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,4,1,128,1,float16,fp8,0,0.1169808030128479
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,4,1,128,1,fp8,fp8,0,0.11578079462051391
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,4,2,128,1,float16,float16,0,0.11358079910278321
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,4,2,128,1,float16,fp8,0,0.1159775972366333
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,4,2,128,1,fp8,fp8,0,0.11532479524612427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,4,4,128,1,float16,float16,0,0.08842880129814149
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,4,4,128,1,float16,fp8,0,0.08604959845542907
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,4,4,128,1,fp8,fp8,0,0.08524320125579835
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,4,1,128,1,float16,float16,0,0.08635200262069702
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,4,1,128,1,float16,fp8,0,0.08599839806556701
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,4,1,128,1,fp8,fp8,0,0.08503360152244568
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,4,1,128,1,fp8,fp8,0,0.570251178741455
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,4,2,128,1,float16,fp8,0,0.08630080223083496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,4,2,128,1,fp8,fp8,0,0.08502399921417236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,4,2,128,1,float16,fp8,0,0.5711135864257812
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,4,4,128,1,fp8,fp8,0,0.31245601177215576
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,4,1,128,1,float16,float16,0,0.5152991771697998
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,4,1,128,1,float16,fp8,0,0.5656239986419678
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,4,2,128,1,float16,float16,0,0.514406394958496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,4,1,128,1,fp8,fp8,0,0.565230417251587
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,4,4,128,1,float16,float16,0,0.2962064027786255
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,4,4,128,1,float16,fp8,0,0.3064255952835083
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,4,2,128,1,float16,fp8,0,0.5646240234375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,4,4,128,1,fp8,fp8,0,0.3042959928512573
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,4,2,128,1,fp8,fp8,0,0.5687967777252197
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,4,1,128,1,float16,float16,0,0.2731519937515259
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,4,1,128,1,float16,fp8,0,0.30496160984039306
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,4,1,128,1,float16,float16,0,0.17465440034866334
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,4,2,128,1,float16,fp8,0,0.3042288064956665
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,4,4,128,1,float16,float16,0,0.1694767951965332
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,4,2,128,1,fp8,fp8,0,0.3042623996734619
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,4,4,128,1,fp8,fp8,0,0.17328640222549438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,4,4,128,1,float16,fp8,0,0.17287039756774902
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,4,1,128,1,float16,float16,0,0.15783040523529052
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,4,1,128,1,float16,fp8,0,0.17140159606933594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,4,1,128,1,fp8,fp8,0,0.17067840099334716
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,4,2,128,1,float16,float16,0,0.15949759483337403
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,4,2,128,1,float16,fp8,0,0.17212799787521363
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,4,4,128,1,float16,float16,0,0.10727200508117676
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,4,4,128,1,float16,fp8,0,0.10779839754104614
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,4,2,128,1,fp8,fp8,0,0.17146559953689575
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,4,1,128,1,float16,float16,0,0.10178879499435425
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,4,4,128,1,fp8,fp8,0,0.10693919658660889
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,4,1,128,1,float16,fp8,0,0.10721440315246582
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,4,1,128,1,fp8,fp8,0,0.10682719945907593
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,4,2,128,1,float16,float16,0,0.1015023946762085
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,4,2,128,1,fp8,fp8,0,0.10664960145950317
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,4,4,128,1,float16,float16,0,0.0659712016582489
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,4,4,128,1,float16,fp8,0,0.06606400012969971
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,4,4,128,1,fp8,fp8,0,0.06575359702110291
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,4,1,128,1,float16,float16,0,0.06395840048789977
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,4,1,128,1,float16,fp8,0,0.06584799885749817
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,4,1,128,1,fp8,fp8,0,0.06573119759559631
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,4,2,128,1,float16,float16,0,0.06404160261154175
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,4,2,128,1,float16,float16,0,0.0863919973373413
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,4,2,128,1,float16,fp8,0,0.06594880223274231
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,4,2,128,1,fp8,fp8,0,0.06593760251998901
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,4,4,128,1,float16,float16,0,0.06156960129737854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,4,4,128,1,float16,fp8,0,0.05989120006561279
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,4,4,128,1,fp8,fp8,0,0.060684800148010254
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,4,1,128,1,float16,float16,0,0.05980160236358643
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,4,1,128,1,float16,fp8,0,0.05974239706993103
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,4,1,128,1,fp8,fp8,0,0.06012639999389648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,4,2,128,1,float16,float16,0,0.059910398721694944
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,4,2,128,1,float16,fp8,0,0.059683197736740114
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,4,2,128,1,fp8,fp8,0,0.0611519992351532
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,4,1,128,1,float16,float16,0,0.31672799587249756
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,4,1,128,1,float16,fp8,0,0.36674559116363525
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,4,2,128,1,float16,float16,0,0.31992158889770506
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,4,2,128,1,float16,fp8,0,0.36729280948638915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,4,2,128,1,float16,float16,0,0.2757215976715088
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,4,1,128,1,fp8,fp8,0,0.3024319887161255
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,4,2,128,1,fp8,fp8,0,0.36935200691223147
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,4,4,128,1,float16,float16,0,0.1907312035560608
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,4,4,128,1,float16,fp8,0,0.20009119510650636
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,4,4,128,1,fp8,fp8,0,0.19947199821472167
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,4,1,128,1,float16,float16,0,0.17427040338516236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,4,1,128,1,float16,fp8,0,0.2002351999282837
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,4,1,128,1,fp8,fp8,0,0.198854398727417
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,4,2,128,1,float16,fp8,0,0.20035998821258544
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,4,4,128,1,float16,float16,0,0.11363680362701416
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,4,2,128,1,fp8,fp8,0,0.1999328017234802
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,4,4,128,1,float16,fp8,0,0.1165168046951294
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,4,1,128,1,float16,float16,0,0.10414719581604004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,4,1,128,1,float16,fp8,0,0.11529920101165772
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,4,2,128,1,float16,fp8,0,0.10693279504776002
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,4,1,128,1,fp8,fp8,0,0.11570240259170532
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,4,2,128,1,float16,float16,0,0.10568159818649292
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,4,2,128,1,float16,fp8,0,0.11552159786224366
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,4,2,128,1,fp8,fp8,0,0.11591199636459351
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,4,4,128,1,float16,float16,0,0.0723360002040863
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,4,4,128,1,float16,fp8,0,0.07376160025596619
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,4,4,128,1,fp8,fp8,0,0.07379840016365051
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,4,1,128,1,float16,float16,0,0.06864960193634033
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,4,1,128,1,float16,fp8,0,0.07365279793739318
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,4,1,128,1,fp8,fp8,0,0.07265759706497192
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,4,2,128,1,float16,float16,0,0.5568607807159424
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,4,2,128,1,float16,float16,0,0.06907520294189454
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,4,2,128,1,float16,fp8,0,0.07358239889144898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,4,2,128,1,fp8,fp8,0,0.07301279902458191
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,4,4,128,1,float16,float16,0,0.05160639882087707
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,4,4,128,1,float16,fp8,0,0.05145599842071533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,4,4,128,1,fp8,fp8,0,0.05149440169334411
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,4,1,128,1,float16,float16,0,0.04939840137958527
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,4,1,128,1,float16,fp8,0,0.051446402072906496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,4,1,128,1,fp8,fp8,0,0.05141119956970215
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,4,2,128,1,float16,float16,0,0.04992960095405578
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,4,2,128,1,fp8,fp8,0,0.051534402370452884
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,4,4,128,1,float16,float16,0,0.04936000108718872
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,4,2,128,1,float16,fp8,0,0.05149279832839966
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,4,4,128,1,float16,fp8,0,0.047516798973083495
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,4,4,128,1,fp8,fp8,0,0.04733439981937408
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,4,1,128,1,float16,float16,0,0.04826720058917999
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,4,1,128,1,float16,fp8,0,0.04742720127105713
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,4,1,128,1,fp8,fp8,0,0.04806079864501953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,4,2,128,1,float16,float16,0,0.047624000906944276
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,4,2,128,1,float16,fp8,0,0.04729759991168976
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,4,2,128,1,fp8,fp8,0,0.04732159972190857
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,4,1,128,1,fp8,fp8,0,0.36865279674530027
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,4,1,128,1,float16,float16,0,0.30960960388183595
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,4,1,128,1,float16,fp8,0,0.3904671907424927
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,4,1,128,1,fp8,fp8,0,0.3917680025100708
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,4,2,128,1,float16,fp8,0,0.3913952112197876
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,4,4,128,1,float16,float16,0,0.19128799438476562
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,4,2,128,1,fp8,fp8,0,0.3919503927230835
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,4,4,128,1,float16,fp8,0,0.20919039249420165
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,4,1,128,1,float16,float16,0,0.16949599981307983
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,4,4,128,1,fp8,fp8,0,0.2092128038406372
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,4,1,128,1,float16,fp8,0,0.20880959033966065
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,4,2,128,1,fp8,fp8,0,0.20855519771575928
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,4,4,128,1,float16,float16,0,0.1101151943206787
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,4,2,128,1,float16,float16,0,0.17580000162124634
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,4,4,128,1,float16,fp8,0,0.1173856019973755
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,4,4,128,1,fp8,fp8,0,0.11761120557785035
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,4,1,128,1,float16,float16,0,0.09861599802970886
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,4,1,128,1,float16,fp8,0,0.11549279689788819
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,4,1,128,1,fp8,fp8,0,0.11536159515380859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,4,2,128,1,float16,float16,0,0.10009440183639526
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,4,2,128,1,float16,fp8,0,0.11580640077590942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,4,2,128,1,fp8,fp8,0,0.11583679914474487
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,4,4,128,1,float16,float16,0,0.06791840195655822
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,4,4,128,1,float16,fp8,0,0.06980640292167664
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,4,4,128,1,fp8,fp8,0,0.06983360052108764
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,4,1,128,1,float16,float16,0,0.06180319786071777
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,4,1,128,1,float16,fp8,0,0.06982240080833435
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,4,1,128,1,fp8,fp8,0,0.06961280107498169
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,4,2,128,1,float16,float16,0,0.0625328004360199
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,4,2,128,1,float16,fp8,0,0.06986560225486756
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,4,2,128,1,fp8,fp8,0,0.06977440118789673
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,4,4,128,1,float16,float16,0,0.04118880033493042
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,4,4,128,1,float16,fp8,0,0.04320479929447174
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,4,4,128,1,fp8,fp8,0,0.04315840005874634
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,4,1,128,1,float16,float16,0,0.03914560079574585
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,4,1,128,1,float16,fp8,0,0.043219199776649474
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,4,1,128,1,fp8,fp8,0,0.04311839938163757
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,4,2,128,1,float16,float16,0,0.03914720118045807
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,4,2,128,1,float16,fp8,0,0.04312160015106201
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,4,2,128,1,fp8,fp8,0,0.04316479861736298
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,4,4,128,1,float16,float16,0,0.037006399035453795
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,4,4,128,1,float16,fp8,0,0.03713279962539673
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,4,4,128,1,fp8,fp8,0,0.0370608001947403
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,4,1,128,1,float16,float16,0,0.0352512001991272
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,4,1,128,1,float16,fp8,0,0.037110400199890134
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,4,2,128,1,float16,float16,0,0.03670240044593811
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,4,2,128,1,float16,fp8,0,0.0371535986661911
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,4,2,128,1,fp8,fp8,0,0.03712640106678009
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,4,2,128,1,float16,float16,0,0.315065598487854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,4,4,128,1,float16,fp8,0,0.0350383996963501
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,4,4,128,1,fp8,fp8,0,0.03514240086078644
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,4,1,128,1,float16,float16,0,0.035006400942802426
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,4,1,128,1,fp8,fp8,0,0.2088912010192871
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,4,1,128,1,float16,fp8,0,0.035068801045417784
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,4,1,128,1,fp8,fp8,0,0.035017600655555724
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,4,2,128,1,float16,float16,0,0.03500800132751465
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,4,2,128,1,float16,float16,0,0.17147680521011352
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,4,2,128,1,float16,fp8,0,0.035011199116706845
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,4,2,128,1,fp8,fp8,0,0.03503040075302124
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,4,1,128,1,float16,float16,0,0.2021343946456909
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,4,1,128,1,float16,fp8,0,0.2661504030227661
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,4,4,128,1,fp8,fp8,0,0.1161471962928772
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,4,2,128,1,float16,float16,0,0.20515840053558348
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,4,4,128,1,float16,float16,0,0.13042080402374268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,4,2,128,1,float16,fp8,0,0.2668623924255371
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,4,2,128,1,fp8,fp8,0,0.26415040493011477
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,4,4,128,1,float16,fp8,0,0.14495680332183838
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,4,4,128,1,fp8,fp8,0,0.1458191990852356
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,4,1,128,1,float16,float16,0,0.11373599767684936
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,4,1,128,1,float16,fp8,0,0.14292320013046264
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,4,1,128,1,fp8,fp8,0,0.14366559982299804
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,4,2,128,1,float16,float16,0,0.11439839601516724
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,4,2,128,1,float16,fp8,0,0.14376159906387329
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,4,2,128,1,fp8,fp8,0,0.14440640211105346
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,4,4,128,1,float16,fp8,0,0.08161600232124329
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,4,4,128,1,fp8,fp8,0,0.08185279965400696
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,4,1,128,1,float16,float16,0,0.06697760224342346
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,4,1,128,1,float16,fp8,0,0.08066719770431519
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,4,1,128,1,fp8,fp8,0,0.08089759945869446
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,4,2,128,1,float16,fp8,0,0.08071200251579284
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,4,1,128,1,fp8,fp8,0,0.03700479865074158
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,4,2,128,1,fp8,fp8,0,0.08028799891471863
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,4,4,128,1,float16,float16,0,0.04728319942951202
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,4,4,128,1,float16,fp8,0,0.0495712012052536
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,4,4,128,1,fp8,fp8,0,0.04936319887638092
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,4,1,128,1,float16,float16,0,0.04380640089511871
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,4,1,128,1,float16,fp8,0,0.049665600061416626
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,4,4,128,1,float16,float16,0,0.03508639931678772
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,4,1,128,1,fp8,fp8,0,0.04935680031776428
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,4,2,128,1,float16,float16,0,0.044336000084877016
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,4,4,128,1,float16,float16,0,0.03300319910049439
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,4,4,128,1,float16,fp8,0,0.03495039939880371
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,4,4,128,1,fp8,fp8,0,0.03454720079898834
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,4,1,128,1,float16,float16,0,0.030907198786735535
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,4,1,128,1,float16,fp8,0,0.0341264009475708
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,4,1,128,1,fp8,fp8,0,0.03443520069122315
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,4,2,128,1,float16,float16,0,0.030959999561309813
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,4,2,128,1,float16,fp8,0,0.20942399501800538
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,4,2,128,1,float16,fp8,0,0.03409599959850311
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,4,2,128,1,fp8,fp8,0,0.035046398639678955
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,4,4,128,1,float16,float16,0,0.030921599268913268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,4,4,128,1,float16,fp8,0,0.030943998694419862
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,4,4,128,1,fp8,fp8,0,0.030956798791885377
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,4,1,128,1,float16,float16,0,0.02903040051460266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,4,1,128,1,fp8,fp8,0,0.030983999371528625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,4,1,128,1,float16,fp8,0,0.03094879984855652
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,4,2,128,1,float16,float16,0,0.02895520031452179
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,4,2,128,1,float16,fp8,0,0.030899199843406677
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,4,1,128,1,fp8,fp8,0,0.2659296035766602
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,4,4,128,1,float16,float16,0,0.02889760136604309
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,4,2,128,1,fp8,fp8,0,0.03089280128479004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,4,4,128,1,float16,fp8,0,0.028916800022125246
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,4,4,128,1,fp8,fp8,0,0.028911998867988585
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,4,1,128,1,float16,float16,0,0.02887679934501648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,4,1,128,1,float16,fp8,0,0.02898080050945282
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,4,1,128,1,fp8,fp8,0,0.028883200883865357
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,4,2,128,1,float16,float16,0,0.02884480059146881
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,4,2,128,1,float16,fp8,0,0.028908801078796387
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,4,2,128,1,fp8,fp8,0,0.02892960011959076
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,4,1,128,1,float16,float16,0,0.21276319026947021
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,4,1,128,1,fp8,fp8,0,0.3020607948303223
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,4,1,128,1,float16,fp8,0,0.30371999740600586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,4,2,128,1,float16,float16,0,0.21718719005584716
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,4,4,128,1,float16,float16,0,0.13827040195465087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,4,4,128,1,float16,fp8,0,0.16068639755249023
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,4,2,128,1,float16,fp8,0,0.3018671989440918
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,4,4,128,1,float16,float16,0,0.0762943983078003
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,4,1,128,1,float16,float16,0,0.11754239797592163
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,4,4,128,1,fp8,fp8,0,0.1607983946800232
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,4,1,128,1,float16,fp8,0,0.15801119804382324
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,4,1,128,1,fp8,fp8,0,0.15940639972686768
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,4,2,128,1,float16,float16,0,0.11831200122833252
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,4,2,128,1,float16,fp8,0,0.15831040143966674
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,4,2,128,1,float16,float16,0,0.0686464011669159
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,4,4,128,1,float16,float16,0,0.07974879741668701
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,4,2,128,1,fp8,fp8,0,0.15928959846496582
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,4,4,128,1,float16,fp8,0,0.0883072018623352
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,4,1,128,1,float16,float16,0,0.06822720170021057
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,4,4,128,1,fp8,fp8,0,0.088919997215271
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,4,1,128,1,float16,fp8,0,0.08674399852752686
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,4,1,128,1,fp8,fp8,0,0.08627039790153504
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,4,2,128,1,float16,float16,0,0.06917279958724976
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,4,2,128,1,float16,fp8,0,0.08714720010757446
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,4,2,128,1,fp8,fp8,0,0.08654239773750305
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,4,4,128,1,float16,float16,0,0.04824959933757782
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,4,4,128,1,float16,fp8,0,0.05173119902610779
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,4,4,128,1,fp8,fp8,0,0.05140479803085327
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,4,1,128,1,float16,fp8,0,0.05144960284233093
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,4,1,128,1,fp8,fp8,0,0.0514847993850708
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,4,2,128,1,float16,float16,0,0.04294080138206482
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,4,2,128,1,float16,fp8,0,0.051451200246810914
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,4,2,128,1,float16,fp8,0,0.049342399835586546
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,4,4,128,1,float16,float16,0,0.028907200694084166
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,4,2,128,1,fp8,fp8,0,0.05158079862594604
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,4,4,128,1,float16,fp8,0,0.03096800148487091
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,4,1,128,1,float16,float16,0,0.026862400770187377
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,4,1,128,1,float16,fp8,0,0.030928000807762146
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,4,2,128,1,float16,float16,0,0.0268528014421463
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,4,1,128,1,fp8,fp8,0,0.030932798981666565
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,4,2,128,1,float16,fp8,0,0.030983999371528625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,4,2,128,1,fp8,fp8,0,0.030956798791885377
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,4,4,128,1,fp8,fp8,0,0.02613919973373413
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,4,1,128,1,float16,float16,0,0.02354719936847687
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,4,1,128,1,float16,fp8,0,0.024823999404907225
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,4,1,128,1,fp8,fp8,0,0.026331201195716858
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,4,2,128,1,float16,float16,0,0.024699200689792634
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,4,2,128,1,float16,fp8,0,0.026612800359725953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,4,2,128,1,fp8,fp8,0,0.02682720124721527
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,4,4,128,1,float16,float16,0,0.022963200509548188
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,4,4,128,1,float16,fp8,0,0.023262399435043334
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,4,4,128,1,fp8,fp8,0,0.022840000689029694
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,4,1,128,1,float16,float16,0,0.022697600722312927
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,4,1,128,1,float16,fp8,0,0.02282560020685196
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,4,1,128,1,fp8,fp8,0,0.022750400006771088
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,4,2,128,1,float16,float16,0,0.022752000391483305
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,4,2,128,1,fp8,fp8,0,0.3028464078903198
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,4,2,128,1,float16,fp8,0,0.022708800435066224
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,4,2,128,1,fp8,fp8,0,0.022976000607013703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,4,4,128,1,float16,fp8,0,0.022734400629997254
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,4,4,128,1,fp8,fp8,0,0.022676800191402436
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,4,1,128,1,float16,float16,0,0.0226623997092247
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,4,1,128,1,float16,fp8,0,0.02269600033760071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,4,1,128,1,fp8,fp8,0,0.02266079932451248
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,4,2,128,1,float16,float16,0,0.022710399329662324
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,4,2,128,1,float16,fp8,0,0.022737599909305573
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,4,2,128,1,fp8,fp8,0,0.022742399573326112
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,4,1,128,1,float16,float16,0,0.04215520024299622
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,4,2,128,1,fp8,fp8,0,0.04960640072822571
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,4,1,128,1,float16,float16,0,0.16473599672317504
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,4,4,128,1,fp8,fp8,0,0.03097440004348755
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,4,1,128,1,float16,fp8,0,0.2562448024749756
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,4,4,128,1,float16,fp8,0,0.026025599241256712
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,4,2,128,1,float16,float16,0,0.1684559941291809
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,4,1,128,1,fp8,fp8,0,0.2583215951919556
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,4,2,128,1,float16,fp8,0,0.2545023918151855
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,4,4,128,1,float16,float16,0,0.11183840036392212
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,4,4,128,1,float16,fp8,0,0.13606079816818237
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,4,1,128,1,float16,float16,0,0.09065120220184326
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,4,4,128,1,fp8,fp8,0,0.13711520433425903
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,4,1,128,1,float16,fp8,0,0.1357983946800232
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,4,1,128,1,fp8,fp8,0,0.13551679849624634
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,4,2,128,1,float16,float16,0,0.09145280122756957
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,4,2,128,1,float16,fp8,0,0.1359935998916626
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,4,4,128,1,float16,float16,0,0.06341919898986817
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,4,4,128,1,fp8,fp8,0,0.07384960055351257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,4,2,128,1,fp8,fp8,0,0.13599679470062256
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,4,1,128,1,float16,float16,0,0.05214400291442871
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,4,1,128,1,float16,fp8,0,0.07195680141448975
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,4,1,128,1,fp8,fp8,0,0.07191359996795654
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,4,2,128,1,float16,fp8,0,0.07325119972229004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,4,4,128,1,float16,float16,0,0.038406398892402646
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,4,2,128,1,fp8,fp8,0,0.07197759747505188
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,4,4,128,1,float16,fp8,0,0.0425247997045517
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,4,1,128,1,float16,float16,0,0.03294079899787903
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,4,4,128,1,fp8,fp8,0,0.041577601432800294
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,4,1,128,1,float16,fp8,0,0.0418368011713028
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,4,1,128,1,fp8,fp8,0,0.04199999868869782
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,4,2,128,1,float16,fp8,0,0.04210239946842194
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,4,2,128,1,fp8,fp8,0,0.04233759939670563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,4,4,128,1,float16,float16,0,0.02237440049648285
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,4,4,128,1,float16,fp8,0,0.024857600033283234
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,4,4,128,1,fp8,fp8,0,0.02481119930744171
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,4,1,128,1,float16,float16,0,0.020393599569797517
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,4,1,128,1,float16,fp8,0,0.02483679950237274
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,4,4,128,1,float16,float16,0,0.022708800435066224
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,4,1,128,1,fp8,fp8,0,0.02476319968700409
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,4,2,128,1,float16,fp8,0,0.024792000651359558
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,4,2,128,1,fp8,fp8,0,0.024884800612926482
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,4,4,128,1,float16,float16,0,0.018668800592422485
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,4,4,128,1,float16,fp8,0,0.020691199600696562
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,4,4,128,1,fp8,fp8,0,0.02060000002384186
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,4,1,128,1,float16,float16,0,0.01663679927587509
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,4,1,128,1,float16,fp8,0,0.02051839977502823
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,4,1,128,1,fp8,fp8,0,0.0207056000828743
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,4,2,128,1,float16,float16,0,0.017027199268341064
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,4,2,128,1,float16,fp8,0,0.020587199926376344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,4,2,128,1,fp8,fp8,0,0.020692799985408784
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,4,4,128,1,float16,float16,0,0.016616000235080718
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,4,4,128,1,float16,fp8,0,0.018646399676799773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,4,4,128,1,fp8,fp8,0,0.01860959976911545
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,4,1,128,1,float16,float16,0,0.01655520051717758
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,4,2,128,1,fp8,fp8,0,0.2562927961349487
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,4,1,128,1,float16,fp8,0,0.018564799427986146
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,4,2,128,1,float16,float16,0,0.016571199893951415
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,4,2,128,1,float16,fp8,0,0.01863519996404648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,4,2,128,1,fp8,fp8,0,0.018617600202560425
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,4,4,128,1,float16,float16,0,0.01658399999141693
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,4,4,128,1,float16,fp8,0,0.07403839826583862
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,4,4,128,1,float16,fp8,0,0.016646400094032288
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,4,4,128,1,fp8,fp8,0,0.016603200137615202
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,4,1,128,1,float16,float16,0,0.016571199893951415
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,4,2,128,1,float16,float16,0,0.05361279845237732
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,4,1,128,1,float16,fp8,0,0.016524800658226015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,4,2,128,1,float16,float16,0,0.016603200137615202
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,4,2,128,1,float16,fp8,0,0.016624000668525696
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,4,2,128,1,fp8,fp8,0,0.016553600132465363
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,4,4,128,1,float16,float16,0,0.016499200463294984
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,4,4,128,1,fp8,fp8,0,0.016468800604343414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,4,4,128,1,float16,fp8,0,0.016547200083732606
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,4,2,128,1,float16,float16,0,0.03227039873600006
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,4,1,128,1,float16,float16,0,0.016603200137615202
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,4,1,128,1,float16,fp8,0,0.01648160070180893
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,4,1,128,1,fp8,fp8,0,0.016524800658226015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,4,2,128,1,float16,float16,0,0.016515199840068818
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,4,2,128,1,float16,fp8,0,0.016571199893951415
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,4,2,128,1,fp8,fp8,0,0.016515199840068818
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,4,1,128,1,float16,fp8,0,0.12089120149612427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,4,2,128,1,float16,float16,0,0.020656000077724456
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,4,2,128,1,float16,float16,0,0.07695040106773376
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,4,1,128,1,fp8,fp8,0,0.11895359754562378
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,4,2,128,1,float16,fp8,0,0.12098560333251954
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,4,4,128,1,float16,float16,0,0.055576002597808837
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,4,4,128,1,float16,fp8,0,0.06665440201759339
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,4,4,128,1,fp8,fp8,0,0.06774880290031433
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,4,2,128,1,fp8,fp8,0,0.12102559804916382
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,4,1,128,1,float16,fp8,0,0.06516000032424926
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,4,2,128,1,float16,float16,0,0.046265599131584165
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,4,2,128,1,float16,fp8,0,0.06557599902153015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,4,2,128,1,fp8,fp8,0,0.06577919721603394
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,4,4,128,1,float16,float16,0,0.03105440139770508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,4,4,128,1,float16,fp8,0,0.037092798948287965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,4,4,128,1,fp8,fp8,0,0.03659360110759735
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,4,1,128,1,float16,fp8,0,0.03648000061511993
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,4,1,128,1,fp8,fp8,0,0.03699359893798828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,4,2,128,1,float16,float16,0,0.024984000623226164
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,4,2,128,1,float16,fp8,0,0.037003201246261594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,4,2,128,1,fp8,fp8,0,0.03699840009212494
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,4,4,128,1,float16,float16,0,0.02481440007686615
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,4,4,128,1,float16,float16,0,0.018566399812698364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,4,4,128,1,float16,fp8,0,0.022676800191402436
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,4,4,128,1,fp8,fp8,0,0.02263839989900589
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,4,1,128,1,float16,float16,0,0.016612799465656282
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,4,1,128,1,float16,fp8,0,0.022724799811840057
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,4,1,128,1,fp8,fp8,0,0.016739200055599212
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,4,1,128,1,fp8,fp8,0,0.022758400440216063
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,4,2,128,1,float16,float16,0,0.01659359931945801
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,4,2,128,1,float16,fp8,0,0.022700800001621245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,4,2,128,1,fp8,fp8,0,0.022752000391483305
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,4,4,128,1,float16,fp8,0,0.017718400061130523
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,4,4,128,1,float16,float16,0,0.01634880006313324
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,4,4,128,1,fp8,fp8,0,0.016918399930000307
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,4,1,128,1,float16,fp8,0,0.0166143998503685
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,4,1,128,1,fp8,fp8,0,0.017044800519943237
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,4,2,128,1,float16,fp8,0,0.016710400581359863
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,4,1,128,1,fp8,fp8,0,0.016543999314308167
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,4,2,128,1,fp8,fp8,0,0.016601599752902985
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,4,4,128,1,float16,float16,0,0.01451520025730133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,4,4,128,1,fp8,fp8,0,0.014519999921321868
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,4,1,128,1,float16,float16,0,0.012532800436019897
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,4,1,128,1,float16,fp8,0,0.014556799829006196
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,4,1,128,1,fp8,fp8,0,0.01451680064201355
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,4,2,128,1,float16,float16,0,0.01451839953660965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,4,2,128,1,float16,fp8,0,0.014558400213718414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,4,1,128,1,float16,float16,0,0.0756879985332489
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,4,2,128,1,fp8,fp8,0,0.014608000218868256
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,4,4,128,1,float16,float16,0,0.012918399274349212
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,4,4,128,1,float16,fp8,0,0.014523200690746307
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,4,4,128,1,fp8,fp8,0,0.014535999298095703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,4,1,128,1,float16,float16,0,0.012593600153923034
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,4,1,128,1,float16,fp8,0,0.014455999433994293
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,4,1,128,1,fp8,fp8,0,0.013031999766826629
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,4,1,128,1,float16,float16,0,0.04423519968986511
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,4,2,128,1,float16,float16,0,0.012507200241088867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,4,2,128,1,float16,fp8,0,0.013998399674892425
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,4,2,128,1,fp8,fp8,0,0.014044800400733947
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,4,1,128,1,fp8,fp8,0,0.0657423973083496
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,4,4,128,1,float16,float16,0,0.012494400143623352
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,4,4,128,1,float16,fp8,0,0.012939199805259705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,4,4,128,1,fp8,fp8,0,0.012492799758911132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,4,1,128,1,float16,float16,0,0.012902399897575379
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,4,1,128,1,float16,fp8,0,0.01249919980764389
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,4,2,128,1,float16,float16,0,0.012807999551296235
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,4,2,128,1,float16,fp8,0,0.012668800354003907
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,4,2,128,1,fp8,fp8,0,0.013249599933624267
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,4,1,128,1,float16,float16,0,0.025123199820518492
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,4,4,128,1,float16,fp8,0,0.012516799569129943
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,4,4,128,1,fp8,fp8,0,0.012558400630950928
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,4,1,128,1,float16,float16,0,0.012486399710178375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,4,1,128,1,float16,fp8,0,0.012518399953842163
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,4,1,128,1,fp8,fp8,0,0.012555199861526489
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,4,2,128,1,float16,float16,0,0.01255040019750595
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,4,2,128,1,float16,fp8,0,0.012495999783277511
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,4,2,128,1,fp8,fp8,0,0.01249760016798973
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,4,1,128,1,float16,float16,0,0.047331199049949646
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,4,1,128,1,float16,fp8,0,0.06808159947395324
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,4,1,128,1,fp8,fp8,0,0.06783199906349183
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,4,2,128,1,float16,float16,0,0.048518401384353635
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,4,2,128,1,float16,fp8,0,0.06792479753494263
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,4,4,128,1,float16,float16,0,0.03481279909610748
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,4,2,128,1,fp8,fp8,0,0.06955360174179077
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,4,4,128,1,float16,fp8,0,0.039131200313568114
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,4,4,128,1,fp8,fp8,0,0.03909600079059601
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,4,1,128,1,float16,float16,0,0.02887359857559204
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,4,1,128,1,float16,fp8,0,0.039212799072265624
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,4,1,128,1,fp8,fp8,0,0.039139199256896975
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,4,2,128,1,float16,float16,0,0.028857600688934327
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,4,2,128,1,float16,fp8,0,0.03913280069828033
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,4,1,128,1,float16,float16,0,0.014587199687957764
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,4,2,128,1,fp8,fp8,0,0.039156800508499144
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,4,4,128,1,float16,float16,0,0.020606400072574617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,4,2,128,1,float16,float16,0,0.014548799395561219
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,4,4,128,1,float16,fp8,0,0.022784000635147093
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,4,4,128,1,fp8,fp8,0,0.02274720072746277
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,4,1,128,1,float16,float16,0,0.018441599607467652
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,4,1,128,1,float16,fp8,0,0.022755199670791627
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,4,2,128,1,float16,float16,0,0.017697599530220032
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,4,1,128,1,fp8,fp8,0,0.022750400006771088
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,4,2,128,1,fp8,fp8,0,0.022767999768257143
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,4,4,128,1,float16,fp8,0,0.014643199741840363
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,4,4,128,1,float16,float16,0,0.012513600289821625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,4,4,128,1,float16,fp8,0,0.014632000029087067
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,4,4,128,1,fp8,fp8,0,0.014548799395561219
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,4,1,128,1,float16,fp8,0,0.014603200554847717
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,4,1,128,1,fp8,fp8,0,0.014571200311183929
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,4,2,128,1,float16,fp8,0,0.014577600359916686
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,4,2,128,1,fp8,fp8,0,0.014524799585342408
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,4,4,128,1,float16,float16,0,0.012441600114107132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,4,4,128,1,float16,fp8,0,0.012508800625801087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,4,1,128,1,float16,float16,0,0.01072160005569458
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,4,1,128,1,float16,fp8,0,0.012484800070524216
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,4,1,128,1,fp8,fp8,0,0.012555199861526489
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,4,2,128,1,float16,float16,0,0.01085439994931221
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,4,2,128,1,float16,fp8,0,0.012520000338554382
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,4,2,128,1,fp8,fp8,0,0.012529599666595458
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,4,4,128,1,float16,float16,0,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,4,4,128,1,float16,fp8,0,0.011153600364923476
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,4,1,128,1,fp8,fp8,0,0.01250240057706833
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,4,4,128,1,fp8,fp8,0,0.010444799810647965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,4,1,128,1,float16,float16,0,0.010499200224876404
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,4,1,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,4,1,128,1,fp8,fp8,0,0.010923200100660325
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,4,2,128,1,float16,float16,0,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,4,2,128,1,float16,fp8,0,0.010497599840164185
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,4,4,128,1,float16,float16,0,0.010475199669599533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,4,2,128,1,fp8,fp8,0,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,4,4,128,1,float16,fp8,0,0.01053439974784851
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,4,4,128,1,fp8,fp8,0,0.0104592002928257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,4,1,128,1,float16,fp8,0,0.010539200156927109
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,4,1,128,1,float16,float16,0,0.010552000254392624
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,4,1,128,1,fp8,fp8,0,0.010500799864530563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,4,2,128,1,float16,float16,0,0.010503999888896942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,4,2,128,1,float16,fp8,0,0.010516799986362457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,4,4,128,1,float16,float16,0,0.01045759990811348
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,4,4,128,1,float16,fp8,0,0.010475199669599533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,4,4,128,1,fp8,fp8,0,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,4,1,128,1,float16,float16,0,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,4,1,128,1,float16,fp8,0,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,4,1,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,4,2,128,1,float16,float16,0,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,4,2,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,4,2,128,1,fp8,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,4,4,128,1,float16,float16,0,0.01037919968366623
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,4,4,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,4,4,128,1,fp8,fp8,0,0.0104032002389431
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,4,1,128,1,float16,float16,0,0.010539200156927109
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,4,1,128,1,float16,fp8,0,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,4,2,128,1,float16,float16,0,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,4,1,128,1,fp8,fp8,0,0.01051200032234192
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,4,2,128,1,float16,fp8,0,0.010425599664449692
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,4,4,128,1,fp8,fp8,0,0.012548799812793731
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,4,1,128,1,float16,float16,0,0.0373775988817215
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,4,1,128,1,float16,fp8,0,0.04731839895248413
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,4,1,128,1,fp8,fp8,0,0.04732640087604523
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,4,2,128,1,float16,float16,0,0.03741439878940582
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,4,2,128,1,float16,fp8,0,0.04732640087604523
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,4,2,128,1,fp8,fp8,0,0.04742560088634491
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,4,4,128,1,float16,float16,0,0.024868799746036528
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,4,4,128,1,float16,fp8,0,0.02885279953479767
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,4,4,128,1,fp8,fp8,0,0.02885119915008545
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,4,1,128,1,float16,float16,0,0.023319999873638152
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,4,1,128,1,float16,fp8,0,0.02889440059661865
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,4,1,128,1,fp8,fp8,0,0.0288783997297287
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,4,2,128,1,float16,float16,0,0.023951999843120575
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,4,2,128,1,float16,fp8,0,0.028859201073646545
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,4,2,128,1,fp8,fp8,0,0.02887679934501648
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,4,4,128,1,float16,float16,0,0.016596800088882445
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,4,4,128,1,float16,fp8,0,0.018628799915313722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,4,2,128,1,fp8,fp8,0,0.010447999835014344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,4,4,128,1,fp8,fp8,0,0.018638400733470915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,4,1,128,1,float16,float16,0,0.014558400213718414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,4,1,128,1,fp8,fp8,0,0.01841759979724884
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,4,1,128,1,float16,fp8,0,0.018596799671649934
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,4,2,128,1,float16,float16,0,0.014958399534225463
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,4,2,128,1,float16,fp8,0,0.018595199286937713
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,4,2,128,1,fp8,fp8,0,0.018566399812698364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,4,4,128,1,float16,fp8,0,0.012486399710178375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,4,4,128,1,fp8,fp8,0,0.012511999905109405
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,4,1,128,1,float16,float16,0,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,4,1,128,1,float16,fp8,0,0.012508800625801087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,4,2,128,1,float16,fp8,0,0.022779199481010436
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,4,1,128,1,fp8,fp8,0,0.01250080019235611
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,4,2,128,1,float16,float16,0,0.011720000207424164
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,4,2,128,1,float16,fp8,0,0.012507200241088867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,4,1,128,1,float16,float16,0,0.012604799866676331
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,4,2,128,1,fp8,fp8,0,0.01249919980764389
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,4,4,128,1,float16,float16,0,0.010417599976062775
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,4,2,128,1,float16,float16,0,0.012406399846076966
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,4,4,128,1,float16,fp8,0,0.01053759977221489
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,4,1,128,1,float16,float16,0,0.010387200117111205
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,4,4,128,1,fp8,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,4,1,128,1,float16,fp8,0,0.01048320010304451
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,4,2,128,1,float16,fp8,0,0.010412800312042236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,4,2,128,1,fp8,fp8,0,0.0104592002928257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,4,4,128,1,float16,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,4,4,128,1,float16,float16,0,0.010406400263309478
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,4,4,128,1,fp8,fp8,0,0.010436800122261048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,4,1,128,1,float16,float16,0,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,4,1,128,1,float16,fp8,0,0.01040000021457672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,4,1,128,1,fp8,fp8,0,0.010395199805498124
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,4,2,128,1,float16,float16,0,0.010385599732398988
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,4,2,128,1,float16,fp8,0,0.010398399829864503
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,4,2,128,1,fp8,fp8,0,0.010499200224876404
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,4,4,128,1,float16,float16,0,0.011396799981594086
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,4,4,128,1,float16,fp8,0,0.010401599854230881
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,4,1,128,1,float16,float16,0,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,4,1,128,1,float16,fp8,0,0.010393600165843963
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,4,1,128,1,fp8,fp8,0,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,4,2,128,1,float16,float16,0,0.010395199805498124
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,4,2,128,1,float16,fp8,0,0.010435199737548828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,4,4,128,1,float16,float16,0,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,4,2,128,1,fp8,fp8,0,0.010353600233793258
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,4,4,128,1,float16,fp8,0,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,4,4,128,1,float16,float16,0,0.012479999661445617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,4,1,128,1,float16,float16,0,0.010428799688816071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,4,1,128,1,fp8,fp8,0,0.010452800244092942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,4,1,128,1,float16,fp8,0,0.010391999781131745
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,4,2,128,1,float16,float16,0,0.009414400160312652
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,4,2,128,1,float16,fp8,0,0.010436800122261048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,4,2,128,1,fp8,fp8,0,0.009444800019264222
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,4,4,128,1,float16,float16,0,0.010384000092744827
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,4,4,128,1,float16,fp8,0,0.009006399661302567
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,4,4,128,1,fp8,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,4,1,128,1,float16,float16,0,0.008483199775218964
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,4,1,128,1,fp8,fp8,0,0.008448000252246856
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,4,2,128,1,float16,float16,0,0.009062399715185165
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,4,2,128,1,float16,fp8,0,0.008713600039482117
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,4,2,128,1,fp8,fp8,0,0.008923199772834779
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,4,1,128,1,float16,float16,0,0.0331167995929718
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,4,4,128,1,float16,float16,0,0.01249760016798973
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,4,1,128,1,float16,fp8,0,0.03711999952793121
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,4,1,128,1,fp8,fp8,0,0.03705280125141144
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,4,2,128,1,float16,float16,0,0.03330720067024231
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,4,2,128,1,float16,fp8,0,0.0370608001947403
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,4,2,128,1,fp8,fp8,0,0.037092798948287965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,4,4,128,1,float16,float16,0,0.022703999280929567
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,4,4,128,1,float16,fp8,0,0.02285120040178299
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,4,4,128,1,fp8,fp8,0,0.02272160053253174
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,4,1,128,1,float16,float16,0,0.020844799280166627
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,4,1,128,1,float16,fp8,0,0.022699199616909027
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,4,1,128,1,fp8,fp8,0,0.022767999768257143
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,4,2,128,1,float16,float16,0,0.020695999264717102
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,4,2,128,1,float16,fp8,0,0.022815999388694764
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,4,2,128,1,fp8,fp8,0,0.02279680073261261
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,4,1,128,1,fp8,fp8,0,0.010489600151777268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,4,4,128,1,float16,float16,0,0.014603200554847717
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,4,4,128,1,float16,fp8,0,0.014662399888038635
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,4,2,128,1,float16,float16,0,0.010372799634933472
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,4,1,128,1,float16,float16,0,0.01459999978542328
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,4,1,128,1,float16,fp8,0,0.014564800262451171
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,4,2,128,1,float16,float16,0,0.014561599493026734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,4,1,128,1,fp8,fp8,0,0.014569599926471711
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,4,2,128,1,float16,fp8,0,0.014535999298095703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,4,2,128,1,fp8,fp8,0,0.014628799259662628
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,4,4,128,1,float16,float16,0,0.010915199667215348
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,4,4,128,1,float16,fp8,0,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,4,4,128,1,fp8,fp8,0,0.010660800337791442
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,4,1,128,1,float16,float16,0,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,4,1,128,1,float16,fp8,0,0.011446399986743927
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,4,1,128,1,fp8,fp8,0,0.010763200372457505
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,4,2,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,4,4,128,1,fp8,fp8,0,0.010384000092744827
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,4,2,128,1,fp8,fp8,0,0.010662399977445603
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,4,4,128,1,float16,float16,0,0.01048159971833229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,4,2,128,1,float16,fp8,0,0.010751999914646149
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,4,4,128,1,fp8,fp8,0,0.010476800054311753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,4,1,128,1,float16,float16,0,0.01048159971833229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,4,1,128,1,float16,fp8,0,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,4,1,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,4,4,128,1,fp8,fp8,0,0.010360000282526016
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,4,2,128,1,float16,fp8,0,0.01043040007352829
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,4,2,128,1,fp8,fp8,0,0.010390400141477584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,4,4,128,1,float16,float16,0,0.010417599976062775
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,4,4,128,1,float16,fp8,0,0.010375999659299851
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,4,4,128,1,fp8,fp8,0,0.01035040020942688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,4,1,128,1,float16,float16,0,0.01037440001964569
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,4,1,128,1,float16,fp8,0,0.009319999814033508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,4,1,128,1,fp8,fp8,0,0.010371199995279311
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,4,2,128,1,float16,float16,0,0.009387200325727462
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,4,2,128,1,float16,fp8,0,0.010414399951696397
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,4,2,128,1,fp8,fp8,0,0.009627199918031692
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,4,4,128,1,float16,float16,0,0.010438399761915207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,4,4,128,1,float16,fp8,0,0.008646400272846222
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,4,4,128,1,fp8,fp8,0,0.009356799721717834
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,4,1,128,1,float16,float16,0,0.009319999814033508
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,4,1,128,1,float16,fp8,0,0.008524800091981888
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,4,1,128,1,fp8,fp8,0,0.008422400057315826
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,4,2,128,1,float16,float16,0,0.009408000111579894
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,4,2,128,1,float16,fp8,0,0.008603200316429138
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,4,2,128,1,fp8,fp8,0,0.009363199770450591
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,4,4,128,1,float16,float16,0,0.008580800145864487
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,4,4,128,1,float16,fp8,0,0.00894080027937889
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,4,4,128,1,fp8,fp8,0,0.009696000069379807
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,4,1,128,1,float16,float16,0,0.009332799911499023
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,4,1,128,1,float16,fp8,0,0.009070400148630142
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,4,1,128,1,fp8,fp8,0,0.00843520015478134
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,4,2,128,1,float16,fp8,0,0.008825600147247314
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,4,2,128,1,float16,float16,0,0.010355199873447418
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,4,4,128,1,float16,float16,0,0.008460800349712371
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,4,2,128,1,fp8,fp8,0,0.01035040020942688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,4,4,128,1,float16,fp8,0,0.0083856001496315
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,4,4,128,1,fp8,fp8,0,0.010390400141477584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,4,1,128,1,float16,float16,0,0.008401600271463394
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,4,1,128,1,float16,fp8,0,0.009985599666833878
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,4,1,128,1,fp8,fp8,0,0.008441600203514098
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,4,2,128,1,float16,float16,0,0.010363200306892395
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,4,2,128,1,float16,fp8,0,0.008432000130414962
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,4,2,128,1,fp8,fp8,0,0.010036800056695938
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,4,1,128,1,float16,float16,0,0.03258880078792572
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,4,1,128,1,float16,fp8,0,0.03268960118293762
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,4,1,128,1,fp8,fp8,0,0.03295519948005676
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,4,2,128,1,float16,float16,0,0.031857600808143614
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,4,2,128,1,float16,fp8,0,0.03296160101890564
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,4,2,128,1,fp8,fp8,0,0.0327919989824295
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,4,4,128,1,float16,float16,0,0.02078080028295517
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,4,4,128,1,float16,fp8,0,0.020707200467586517
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,4,4,128,1,fp8,fp8,0,0.0206496000289917
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,4,1,128,1,float16,float16,0,0.020664000511169435
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,4,1,128,1,float16,fp8,0,0.02070080041885376
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,4,1,128,1,fp8,fp8,0,0.020703999698162077
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,4,2,128,1,float16,float16,0,0.020659199357032774
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,4,2,128,1,float16,fp8,0,0.020609599351882935
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,4,2,128,1,fp8,fp8,0,0.02067359983921051
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,4,4,128,1,float16,float16,0,0.014550399780273438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,4,4,128,1,float16,fp8,0,0.01451839953660965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,4,4,128,1,fp8,fp8,0,0.014455999433994293
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,4,4,128,1,float16,fp8,0,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,4,1,128,1,float16,fp8,0,0.01446239948272705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,4,1,128,1,fp8,fp8,0,0.014454400539398194
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,4,1,128,1,float16,float16,0,0.01438560038805008
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,4,2,128,1,float16,fp8,0,0.014486399292945863
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,4,2,128,1,fp8,fp8,0,0.014524799585342408
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,4,2,128,1,float16,float16,0,0.01048479974269867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,4,2,128,1,float16,float16,0,0.01446239948272705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,4,4,128,1,float16,float16,0,0.010384000092744827
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,4,4,128,1,float16,fp8,0,0.010411199927330018
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,4,4,128,1,fp8,fp8,0,0.010521599650382995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,4,1,128,1,float16,float16,0,0.010425599664449692
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,4,1,128,1,float16,fp8,0,0.010388799756765366
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,4,1,128,1,fp8,fp8,0,0.01037919968366623
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,4,2,128,1,float16,float16,0,0.010531199723482132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,4,2,128,1,float16,fp8,0,0.010475199669599533
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,4,2,128,1,fp8,fp8,0,0.010393600165843963
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,4,4,128,1,float16,fp8,0,0.010411199927330018
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,4,4,128,1,fp8,fp8,0,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,4,1,128,1,float16,float16,0,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,4,1,128,1,float16,fp8,0,0.010388799756765366
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,4,2,128,1,float16,fp8,0,0.010375999659299851
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,4,1,128,1,float16,fp8,0,0.00859839990735054
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,4,2,128,1,fp8,fp8,0,0.010361599922180175
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,4,4,128,1,float16,float16,0,0.010185600072145463
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,4,4,128,1,float16,fp8,0,0.008937600255012512
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,4,4,128,1,fp8,fp8,0,0.009468799829483033
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,4,1,128,1,float16,fp8,0,0.008504000306129456
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,4,1,128,1,fp8,fp8,0,0.010367999970912933
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,4,2,128,1,float16,float16,0,0.00939520001411438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,4,2,128,1,float16,fp8,0,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,4,2,128,1,fp8,fp8,0,0.008481600135564805
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,4,4,128,1,float16,float16,0,0.00891520008444786
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,4,4,128,1,float16,fp8,0,0.008379200100898742
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,4,4,128,1,fp8,fp8,0,0.00840959995985031
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,4,1,128,1,float16,float16,0,0.010409600287675857
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,4,1,128,1,float16,fp8,0,0.00841120034456253
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,4,1,128,1,fp8,fp8,0,0.00904960036277771
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,4,2,128,1,float16,float16,0,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,4,2,128,1,float16,fp8,0,0.009332799911499023
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,4,2,128,1,fp8,fp8,0,0.009399999678134919
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,4,4,128,1,float16,float16,0,0.010043200105428696
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,4,4,128,1,float16,fp8,0,0.008587200194597244
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,4,4,128,1,fp8,fp8,0,0.008444800227880477
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,4,1,128,1,float16,float16,0,0.008436799794435502
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,4,2,128,1,fp8,fp8,0,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,4,1,128,1,float16,fp8,0,0.008392000198364257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,4,1,128,1,fp8,fp8,0,0.009353599697351455
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,4,2,128,1,float16,fp8,0,0.009560000151395798
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,4,2,128,1,fp8,fp8,0,0.009486400336027146
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,4,4,128,1,float16,float16,0,0.008823999762535095
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,4,4,128,1,float16,fp8,0,0.008392000198364257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,4,4,128,1,fp8,fp8,0,0.008500800281763077
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,4,1,128,1,float16,float16,0,0.009046400338411332
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,4,1,128,1,float16,fp8,0,0.008420799672603608
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,4,1,128,1,fp8,fp8,0,0.008478400111198426
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,4,2,128,1,float16,float16,0,0.008929599821567536
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,4,2,128,1,float16,fp8,0,0.008433599770069123
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,4,2,128,1,fp8,fp8,0,0.00950080007314682
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,4,4,128,1,float16,float16,0,0.010422399640083313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,4,1,128,1,float16,float16,0,0.030963200330734252
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,4,1,128,1,float16,fp8,0,0.028862398862838746
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,4,1,128,1,fp8,fp8,0,0.028935998678207397
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,4,2,128,1,float16,float16,0,0.010494399815797806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,4,2,128,1,float16,float16,0,0.030902400612831116
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,4,2,128,1,float16,fp8,0,0.028935998678207397
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,4,1,128,1,float16,float16,0,0.010278400033712387
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,4,4,128,1,float16,float16,0,0.020252799987792967
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,4,4,128,1,float16,fp8,0,0.01865759938955307
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,4,2,128,1,fp8,fp8,0,0.028910401463508605
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,4,4,128,1,fp8,fp8,0,0.018563200533390046
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,4,1,128,1,float16,float16,0,0.020342400670051573
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,4,1,128,1,float16,fp8,0,0.0186271995306015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,4,1,128,1,fp8,fp8,0,0.018705600500106813
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,4,2,128,1,float16,float16,0,0.018878400325775146
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,4,2,128,1,float16,fp8,0,0.018673600256443025
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,4,2,128,1,fp8,fp8,0,0.018662400543689728
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,4,4,128,1,float16,float16,0,0.014567999541759491
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,4,4,128,1,float16,fp8,0,0.012600000202655792
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,4,4,128,1,fp8,fp8,0,0.012932799756526947
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,4,1,128,1,float16,float16,0,0.014528000354766845
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,4,1,128,1,float16,fp8,0,0.012521600723266602
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,4,1,128,1,fp8,fp8,0,0.012433599680662155
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,4,2,128,1,float16,float16,0,0.01451680064201355
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,4,2,128,1,float16,fp8,0,0.012729600071907043
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,4,2,128,1,fp8,fp8,0,0.012511999905109405
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,4,4,128,1,float16,float16,0,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,4,4,128,1,float16,fp8,0,0.010435199737548828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,4,4,128,1,fp8,fp8,0,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,4,1,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,4,1,128,1,float16,fp8,0,0.010416000336408614
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,4,1,128,1,fp8,fp8,0,0.010435199737548828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,4,2,128,1,float16,float16,0,0.010489600151777268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,4,2,128,1,float16,fp8,0,0.010476800054311753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,4,2,128,1,fp8,fp8,0,0.010390400141477584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,4,4,128,1,float16,float16,0,0.010395199805498124
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,4,4,128,1,float16,fp8,0,0.01045759990811348
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,4,4,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,4,1,128,1,float16,float16,0,0.010416000336408614
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,4,1,128,1,float16,fp8,0,0.009217599779367447
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,4,1,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,4,2,128,1,float16,float16,0,0.01048479974269867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,4,2,128,1,float16,fp8,0,0.010361599922180175
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,4,4,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,4,4,128,1,float16,fp8,0,0.009374400228261947
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,4,1,128,1,float16,float16,0,0.010406400263309478
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,4,2,128,1,fp8,fp8,0,0.009286399930715561
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,4,1,128,1,float16,fp8,0,0.008614400029182434
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,4,4,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,4,1,128,1,fp8,fp8,0,0.009726399928331375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,4,2,128,1,float16,float16,0,0.010385599732398988
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,4,2,128,1,float16,fp8,0,0.008423999696969987
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,4,2,128,1,fp8,fp8,0,0.008804800361394883
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,4,4,128,1,float16,float16,0,0.008689600229263305
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,4,4,128,1,float16,fp8,0,0.008551999926567078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,4,4,128,1,fp8,fp8,0,0.008396799862384795
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,4,1,128,1,float16,float16,0,0.009009599685668945
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,4,4,128,1,fp8,fp8,0,0.014635199308395385
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,4,1,128,1,fp8,fp8,0,0.008510400354862214
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,4,2,128,1,float16,fp8,0,0.008419200032949447
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,4,2,128,1,float16,float16,0,0.008401600271463394
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,4,4,128,1,float16,float16,0,0.009228800237178803
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,4,4,128,1,float16,fp8,0,0.008417599648237229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,4,1,128,1,float16,fp8,0,0.008481600135564805
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,4,4,128,1,fp8,fp8,0,0.00846719965338707
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,4,1,128,1,float16,fp8,0,0.008419200032949447
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,4,1,128,1,fp8,fp8,0,0.008419200032949447
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,4,2,128,1,float16,float16,0,0.008700799942016602
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,4,2,128,1,float16,fp8,0,0.008404800295829773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,4,2,128,1,fp8,fp8,0,0.00945120006799698
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,4,4,128,1,float16,float16,0,0.008975999802350998
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,4,4,128,1,float16,fp8,0,0.008484800159931184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,4,4,128,1,fp8,fp8,0,0.008399999886751174
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,4,1,128,1,float16,float16,0,0.010363200306892395
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,4,1,128,1,float16,fp8,0,0.00843999981880188
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,4,1,128,1,fp8,fp8,0,0.008417599648237229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,4,2,128,1,float16,float16,0,0.008814399689435959
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,4,2,128,1,float16,fp8,0,0.008393599838018417
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,4,1,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,2,1,128,1,float16,float16,0,0.9289168357849121
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,2,1,128,1,float16,fp8,0,0.9297311782836915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,2,2,128,1,float16,float16,0,0.5228943824768066
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,2,2,128,1,float16,fp8,0,0.5281631946563721
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16384,2,1,128,1,fp8,fp8,0,0.9278528213500976
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,2,2,128,1,fp8,fp8,0,0.5270927906036377
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,2,1,128,1,float16,float16,0,0.5209648132324218
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,2,1,128,1,float16,fp8,0,0.5261616230010986
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,2,2,128,1,float16,float16,0,0.3257008075714111
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,2,1,128,1,fp8,fp8,0,0.525708818435669
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,4,2,128,1,float16,float16,0,0.010016000270843506
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,4,2,128,1,fp8,fp8,0,0.008460800349712371
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,2,2,128,1,fp8,fp8,0,0.32659039497375486
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,2,1,128,1,float16,float16,0,0.3220511913299561
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,2,1,128,1,float16,fp8,0,0.3265520095825195
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,2,2,128,1,float16,float16,0,0.20879039764404297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,2,2,128,1,float16,fp8,0,0.2098207950592041
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,2,2,128,1,fp8,fp8,0,0.20955839157104492
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,2,1,128,1,float16,float16,0,0.209169602394104
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,2,1,128,1,float16,fp8,0,0.20950241088867189
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,2,1,128,1,fp8,fp8,0,0.2094048023223877
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,2,1,128,1,float16,float16,0,0.5605408191680908
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,2,1,128,1,float16,fp8,0,0.5836800098419189
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,12288,2,1,128,1,fp8,fp8,0,0.583892822265625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,2,2,128,1,float16,float16,0,0.3260063886642456
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,2,2,128,1,float16,fp8,0,0.3364495992660522
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,2,2,128,1,fp8,fp8,0,0.33822240829467776
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,2,1,128,1,float16,float16,0,0.3222320079803467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,2,1,128,1,float16,fp8,0,0.33586559295654295
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,2,1,128,1,fp8,fp8,0,0.33735520839691163
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,2,2,128,1,float16,float16,0,0.20895841121673583
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,2,2,128,1,float16,fp8,0,0.21181600093841552
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,2,2,128,1,fp8,fp8,0,0.21191840171813964
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,2,1,128,1,float16,float16,0,0.20904159545898438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,2,1,128,1,float16,fp8,0,0.21282401084899902
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,2,1,128,1,fp8,fp8,0,0.2128607988357544
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,2,2,128,1,float16,float16,0,0.159824001789093
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,2,2,128,1,float16,fp8,0,0.15919840335845947
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,2,2,128,1,fp8,fp8,0,0.15979039669036865
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,2,1,128,1,float16,float16,0,0.15936800241470336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,2,1,128,1,float16,fp8,0,0.15892959833145143
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,2,1,128,1,fp8,fp8,0,0.159278404712677
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,2,1,128,1,float16,float16,0,0.4102015972137451
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,2,2,128,1,float16,float16,0,0.24980480670928956
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,4,1,128,1,float16,float16,0,0.008740799874067307
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,2,1,128,1,fp8,fp8,0,0.4379983901977539
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,2,2,128,1,float16,fp8,0,0.2636096000671387
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,2,1,128,1,fp8,fp8,0,0.32499840259552004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,2,2,128,1,fp8,fp8,0,0.26360158920288085
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,2,1,128,1,float16,fp8,0,0.2632175922393799
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,2,1,128,1,float16,float16,0,0.24815359115600585
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,2,2,128,1,float16,float16,0,0.15208319425582886
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,2,1,128,1,fp8,fp8,0,0.2634592056274414
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,2,2,128,1,float16,fp8,0,0.15866559743881226
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,2,2,128,1,fp8,fp8,0,0.15836960077285767
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,2,1,128,1,float16,float16,0,0.15300480127334595
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,2,1,128,1,float16,fp8,0,0.15823520421981813
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,2,1,128,1,fp8,fp8,0,0.15790719985961915
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,2,2,128,1,float16,float16,0,0.1355023980140686
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,2,2,128,1,float16,fp8,0,0.13356159925460814
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,2,2,128,1,fp8,fp8,0,0.13356640338897705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,2,1,128,1,float16,float16,0,0.13369120359420777
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,2,1,128,1,float16,fp8,0,0.13406399488449097
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,2,1,128,1,fp8,fp8,0,0.13533920049667358
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,2,2,128,1,float16,float16,0,0.28704640865325926
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,2,1,128,1,float16,float16,0,0.5048992156982421
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,2,2,128,1,float16,fp8,0,0.3166304111480713
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,2,1,128,1,float16,fp8,0,0.5625152111053466
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,8192,2,1,128,1,fp8,fp8,0,0.5585536003112793
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,2,2,128,1,fp8,fp8,0,0.3157327890396118
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,2,2,128,1,float16,float16,0,0.17996480464935302
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,2,1,128,1,float16,fp8,0,0.3148688077926636
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,2,1,128,1,fp8,fp8,0,0.3148895978927612
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,2,2,128,1,fp8,fp8,0,0.19232480525970458
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,2,1,128,1,float16,float16,0,0.1795904040336609
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,2,1,128,1,float16,fp8,0,0.19341599941253662
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,2,2,128,1,float16,float16,0,0.11539360284805297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,2,1,128,1,fp8,fp8,0,0.1921183943748474
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,2,2,128,1,fp8,fp8,0,0.11999360322952271
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,2,1,128,1,float16,float16,0,0.11429920196533203
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,2,1,128,1,float16,fp8,0,0.11934080123901367
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,2,1,128,1,fp8,fp8,0,0.11940959692001343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,2,2,128,1,float16,float16,0,0.10964479446411132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,2,2,128,1,float16,fp8,0,0.10881279706954956
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,2,1,128,1,float16,float16,0,0.10877120494842529
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,2,2,128,1,float16,fp8,0,0.3261615991592407
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,10240,2,1,128,1,float16,fp8,0,0.439134407043457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,2,1,128,1,float16,fp8,0,0.10884640216827393
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,2,1,128,1,fp8,fp8,0,0.10907360315322875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,2,1,128,1,float16,float16,0,0.3107471942901611
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,4,2,128,1,fp8,fp8,0,0.009438399970531464
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,2,1,128,1,fp8,fp8,0,0.3632272005081177
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,2,2,128,1,float16,float16,0,0.1845296025276184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,2,2,128,1,float16,fp8,0,0.20709600448608398
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,2,2,128,1,fp8,fp8,0,0.20857601165771483
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,2,1,128,1,float16,float16,0,0.18178559541702272
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,2,1,128,1,float16,fp8,0,0.20722560882568358
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,2,1,128,1,fp8,fp8,0,0.20733280181884767
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,2,2,128,1,float16,float16,0,0.1194111943244934
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,2,2,128,1,float16,fp8,0,0.129094398021698
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,2,2,128,1,fp8,fp8,0,0.12961920499801635
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,2,1,128,1,float16,float16,0,0.1178928017616272
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,2,1,128,1,float16,fp8,0,0.1296239972114563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,2,1,128,1,float16,float16,0,0.2857232093811035
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,2,1,128,1,fp8,fp8,0,0.12815840244293214
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,2,2,128,1,float16,fp8,0,0.09229919910430909
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,2,2,128,1,float16,float16,0,0.08882079720497131
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,2,2,128,1,float16,fp8,0,0.19279839992523193
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,2,1,128,1,float16,float16,0,0.08738240003585815
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,2,1,128,1,float16,fp8,0,0.09241120219230652
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,2,1,128,1,fp8,fp8,0,0.09244639873504638
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,2,2,128,1,float16,fp8,0,0.11974400281906128
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,2,2,128,1,fp8,fp8,0,0.08460000157356262
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,2,1,128,1,float16,float16,0,0.08422240018844604
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,2,1,128,1,fp8,fp8,0,0.08425440192222595
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,2,1,128,1,float16,float16,0,0.2910032033920288
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,2,2,128,1,fp8,fp8,0,0.10970879793167114
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,2,1,128,1,float16,fp8,0,0.37528960704803466
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,4096,2,1,128,1,fp8,fp8,0,0.3733247995376587
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,6144,2,1,128,1,float16,fp8,0,0.362225604057312
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,2,2,128,1,float16,float16,0,0.16917599439620973
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,2,2,128,1,float16,fp8,0,0.20770881175994874
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,2,2,128,1,fp8,fp8,0,0.20550880432128907
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,2,1,128,1,float16,float16,0,0.16835999488830566
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,2,1,128,1,float16,fp8,0,0.20609760284423828
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,2,2,128,1,float16,float16,0,0.10768640041351318
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,2,1,128,1,fp8,fp8,0,0.20477919578552245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,2,2,128,1,float16,fp8,0,0.12373119592666626
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,2,2,128,1,fp8,fp8,0,0.12400319576263427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,2,1,128,1,float16,float16,0,0.10546720027923584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,2,2,128,1,float16,float16,0,0.065830397605896
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,2,1,128,1,fp8,fp8,0,0.12338720560073853
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,2,2,128,1,float16,fp8,0,0.07405279874801636
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,2,1,128,1,float16,float16,0,0.0657696008682251
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,2,1,128,1,fp8,fp8,0,0.0739791989326477
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,2,2,128,1,float16,float16,0,0.061667197942733766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,2,2,128,1,float16,fp8,0,0.06426720023155212
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,2,2,128,1,fp8,fp8,0,0.06450240015983581
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,2,1,128,1,float16,float16,0,0.061875200271606444
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,2,2,128,1,float16,float16,0,0.08418239951133728
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,2,1,128,1,fp8,fp8,0,0.06371679902076721
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,2,2,128,1,float16,float16,0,0.05964159965515137
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,2,2,128,1,float16,fp8,0,0.05963039994239807
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,2,2,128,1,fp8,fp8,0,0.05952159762382507
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,2,1,128,1,float16,float16,0,0.059601598978042604
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,2,1,128,1,float16,fp8,0,0.05958240032196045
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,2,1,128,1,fp8,fp8,0,0.059627199172973634
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,2,1,128,1,float16,float16,0,0.1886639952659607
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,2,1,128,1,float16,fp8,0,0.2528736114501953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,3072,2,1,128,1,fp8,fp8,0,0.2521568059921265
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,2,2,128,1,float16,float16,0,0.11385279893875122
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,2,1,128,1,float16,fp8,0,0.1233504056930542
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,2,2,128,1,float16,fp8,0,0.14169280529022216
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,2,2,128,1,fp8,fp8,0,0.07399359941482545
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,2,2,128,1,fp8,fp8,0,0.142193603515625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,2,1,128,1,float16,float16,0,0.11181600093841552
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,2,1,128,1,float16,fp8,0,0.1419103980064392
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,2,2,128,1,float16,float16,0,0.07218719720840454
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,2,1,128,1,fp8,fp8,0,0.14211039543151854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,2,2,128,1,float16,fp8,0,0.08622080087661743
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,2,2,128,1,fp8,fp8,0,0.08600320219993592
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,2,1,128,1,float16,float16,0,0.07291520237922669
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,2,1,128,1,float16,fp8,0,0.08618720173835755
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,2,1,128,1,fp8,fp8,0,0.0861519992351532
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,2,2,128,1,float16,float16,0,0.052086400985717776
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,2,2,128,1,float16,fp8,0,0.05780159831047058
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,2,2,128,1,fp8,fp8,0,0.09122560024261475
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,2,2,128,1,fp8,fp8,0,0.05756480097770691
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,2,1,128,1,float16,float16,0,0.051497602462768556
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,2,1,128,1,float16,fp8,0,0.057574397325515746
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,2,2,128,1,float16,fp8,0,0.08418400287628174
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,2,2,128,1,float16,float16,0,0.04928480088710785
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,2,2,128,1,float16,fp8,0,0.05140479803085327
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,2,2,128,1,fp8,fp8,0,0.0513759970664978
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,2,1,128,1,float16,float16,0,0.04906879961490631
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,2,1,128,1,float16,fp8,0,0.08496320247650146
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,2,1,128,1,fp8,fp8,0,0.05139039754867554
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,2,2,128,1,fp8,fp8,0,0.047312000393867494
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,2,2,128,1,float16,fp8,0,0.047336000204086306
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,2,1,128,1,float16,fp8,0,0.051344001293182374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,2,1,128,1,float16,float16,0,0.04737440049648285
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,2,1,128,1,float16,fp8,0,0.047310400009155276
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,2,1,128,1,fp8,fp8,0,0.047486400604248045
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,2,1,128,1,float16,float16,0,0.18625760078430176
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,2,2,128,1,float16,float16,0,0.1095695972442627
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,2,1,128,1,float16,fp8,0,0.2786144018173218
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,2048,2,1,128,1,fp8,fp8,0,0.2777951955795288
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,2,2,128,1,float16,fp8,0,0.15115200281143187
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,2,2,128,1,fp8,fp8,0,0.15102720260620117
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,2,1,128,1,float16,float16,0,0.10871360301971436
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,2,1,128,1,float16,fp8,0,0.1502527952194214
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,2,2,128,1,float16,float16,0,0.06807360053062439
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,2,2,128,1,float16,fp8,0,0.08701760172843934
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,2,2,128,1,fp8,fp8,0,0.08636479973793029
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,2,1,128,1,float16,float16,0,0.06682720184326171
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,2,1,128,1,float16,fp8,0,0.08633440136909484
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,2,1,128,1,fp8,fp8,0,0.0867792010307312
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,2,2,128,1,float16,float16,0,0.041203200817108154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,2,2,128,1,float16,fp8,0,0.0514847993850708
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,2,2,128,1,fp8,fp8,0,0.05141440033912659
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,2,1,128,1,float16,float16,0,0.041438400745391846
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,2,1,128,1,float16,fp8,0,0.051419198513031006
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,2,1,128,1,fp8,fp8,0,0.05141760110855102
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,2,2,128,1,float16,float16,0,0.03722400069236755
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,2,2,128,1,float16,fp8,0,0.0411871999502182
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,2,2,128,1,fp8,fp8,0,0.041177600622177124
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,2,1,128,1,float16,float16,0,0.03713920116424561
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,2,1,128,1,float16,fp8,0,0.04118559956550598
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,2,1,128,1,fp8,fp8,0,0.041222399473190306
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,2,1,128,1,float16,fp8,0,0.07399680018424988
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,2,2,128,1,float16,float16,0,0.0355536013841629
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,2,2,128,1,float16,fp8,0,0.03707360029220581
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,2,2,128,1,fp8,fp8,0,0.037088000774383546
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,2,1,128,1,float16,float16,0,0.03545120060443878
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,2,1,128,1,float16,fp8,0,0.037064000964164734
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,2,1,128,1,fp8,fp8,0,0.037110400199890134
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,2,2,128,1,float16,float16,0,0.03497759997844696
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,2,2,128,1,float16,fp8,0,0.035016000270843506
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,2,2,128,1,fp8,fp8,0,0.03507519960403442
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,2,1,128,1,float16,float16,0,0.03504000008106232
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,2,1,128,1,float16,fp8,0,0.035025599598884585
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,2,1,128,1,fp8,fp8,0,0.03506560027599335
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,2,1,128,1,float16,float16,0,0.1272320032119751
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,2,1,128,1,float16,fp8,0,0.19511200189590455
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,2,2,128,1,float16,float16,0,0.07671999931335449
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,2,1,128,1,fp8,fp8,0,0.05749760270118713
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1536,2,1,128,1,fp8,fp8,0,0.19285279512405396
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,2,2,128,1,float16,fp8,0,0.10724799633026123
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,2,2,128,1,fp8,fp8,0,0.10686719417572021
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,2,1,128,1,float16,float16,0,0.07468640208244323
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,2,2,128,1,float16,float16,0,0.047305598855018616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,2,1,128,1,float16,fp8,0,0.10506240129470826
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,2,1,128,1,fp8,fp8,0,0.10676480531692505
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,2,2,128,1,float16,fp8,0,0.06286240220069886
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,2,1,128,1,float16,float16,0,0.04730879962444305
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,2,2,128,1,fp8,fp8,0,0.0622048020362854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,2,1,128,1,float16,fp8,0,0.06363360285758972
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,2,1,128,1,fp8,fp8,0,0.06228799819946289
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,2,2,128,1,float16,float16,0,0.03307200074195862
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,2,2,128,1,float16,fp8,0,0.041145598888397215
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,2,2,128,1,fp8,fp8,0,0.04115839898586273
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,2,1,128,1,float16,float16,0,0.03300319910049439
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,2,1,128,1,float16,fp8,0,0.04028320014476776
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,2,2,128,1,float16,float16,0,0.03094080090522766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,2,2,128,1,float16,fp8,0,0.033236798644065854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,2,2,128,1,fp8,fp8,0,0.033092799782752993
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,2,1,128,1,float16,float16,0,0.030931198596954347
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,2,1,128,1,float16,fp8,0,0.033055999875068666
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,2,1,128,1,fp8,fp8,0,0.03300319910049439
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,2,2,128,1,float16,float16,0,0.02895680069923401
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,2,2,128,1,float16,fp8,0,0.030875200033187868
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,2,2,128,1,fp8,fp8,0,0.030956798791885377
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,2,1,128,1,float16,float16,0,0.02885119915008545
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,2,1,128,1,float16,fp8,0,0.03078559935092926
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,2,1,128,1,fp8,fp8,0,0.15002880096435547
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,2,1,128,1,fp8,fp8,0,0.03095200061798096
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,2,2,128,1,float16,float16,0,0.02885279953479767
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,2,2,128,1,fp8,fp8,0,0.02890079915523529
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,2,1,128,1,float16,float16,0,0.028907200694084166
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,2,1,128,1,fp8,fp8,0,0.028814399242401124
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,2,1,128,1,float16,fp8,0,0.028863999247550964
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,2,1,128,1,float16,float16,0,0.13413280248641968
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,2,2,128,1,float16,float16,0,0.0782800018787384
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,2,1,128,1,float16,fp8,0,0.22798399925231932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1024,2,1,128,1,fp8,fp8,0,0.2282304048538208
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,2,2,128,1,float16,fp8,0,0.12155519723892212
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,2,2,128,1,fp8,fp8,0,0.12260479927062988
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,2,1,128,1,float16,float16,0,0.07717120051383972
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,2,1,128,1,float16,fp8,0,0.12022240161895752
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,2,2,128,1,float16,fp8,0,0.06904000043869019
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,2,1,128,1,fp8,fp8,0,0.12072639465332032
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,2,1,128,1,float16,float16,0,0.046793600916862486
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,2,1,128,1,float16,fp8,0,0.06791520118713379
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,2,2,128,1,float16,float16,0,0.028915199637413024
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,2,1,128,1,fp8,fp8,0,0.06876479983329772
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,2,2,128,1,float16,fp8,0,0.039139199256896975
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,2,2,128,1,fp8,fp8,0,0.03914079964160919
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,2,1,128,1,float16,float16,0,0.028921601176261903
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,2,1,128,1,float16,fp8,0,0.03912639915943146
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,2,1,128,1,fp8,fp8,0,0.03915359973907471
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,2,2,128,1,float16,float16,0,0.024859200417995452
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,2,2,128,1,float16,fp8,0,0.03086400032043457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,2,2,128,1,fp8,fp8,0,0.030742400884628297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,2,1,128,1,float16,float16,0,0.02480800002813339
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,2,1,128,1,float16,fp8,0,0.030055999755859375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,2,2,128,1,float16,float16,0,0.022771200537681578
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,2,1,128,1,fp8,fp8,0,0.030921599268913268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,2,1,128,1,float16,fp8,0,0.06433119773864746
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,2,2,128,1,fp8,fp8,0,0.025019198656082153
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,2,1,128,1,float16,float16,0,0.02305919975042343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,2,1,128,1,fp8,fp8,0,0.024886399507522583
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,2,2,128,1,float16,float16,0,0.04737280011177063
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,2,2,128,1,float16,float16,0,0.022767999768257143
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,2,2,128,1,float16,fp8,0,0.02276480048894882
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,2,2,128,1,fp8,fp8,0,0.022776000201702118
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,2,1,128,1,fp8,fp8,0,0.04121119976043701
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,2,1,128,1,float16,float16,0,0.022726400196552275
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,2,1,128,1,float16,fp8,0,0.023132799565792082
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,2,1,128,1,fp8,fp8,0,0.022735999524593355
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,2,2,128,1,float16,float16,0,0.022735999524593355
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,2,2,128,1,float16,fp8,0,0.02273920029401779
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,2,2,128,1,fp8,fp8,0,0.022756800055503845
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,2,1,128,1,float16,float16,0,0.022745600342750548
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,2,1,128,1,float16,fp8,0,0.02276960015296936
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,2,2,128,1,float16,fp8,0,0.02877599895000458
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,2,1,128,1,fp8,fp8,0,0.022703999280929567
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,2,1,128,1,float16,float16,0,0.1092479944229126
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,2,1,128,1,float16,fp8,0,0.20310399532318116
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,2,2,128,1,float16,float16,0,0.0637279987335205
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,2,2,128,1,float16,fp8,0,0.10806560516357422
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,2,2,128,1,fp8,fp8,0,0.10826400518417359
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,2,1,128,1,float16,float16,0,0.061668801307678225
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,2,2,128,1,float16,float16,0,0.048281601071357726
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,2,1,128,1,float16,fp8,0,0.10671360492706299
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,2,1,128,1,fp8,fp8,0,0.10646719932556152
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,2,2,128,1,float16,fp8,0,0.05962399840354919
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,2,1,128,1,float16,float16,0,0.03634720146656036
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,2,1,128,1,float16,fp8,0,0.05963360071182251
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,2,1,128,1,fp8,fp8,0,0.059680002927780154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,2,2,128,1,float16,float16,0,0.022500799596309663
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,2,2,128,1,float16,fp8,0,0.03299359977245331
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,2,2,128,1,fp8,fp8,0,0.03325439989566803
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,2,1,128,1,float16,float16,0,0.022648000717163087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,2,2,128,1,float16,fp8,0,0.024876800179481507
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,2,1,128,1,float16,fp8,0,0.03316799998283386
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,2,1,128,1,float16,fp8,0,0.025204798579216002
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,2,2,128,1,float16,float16,0,0.018688000738620758
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,2,2,128,1,float16,fp8,0,0.024772800505161285
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,2,2,128,1,fp8,fp8,0,0.024864000082015992
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,2,1,128,1,float16,float16,0,0.01868959963321686
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,2,1,128,1,float16,fp8,0,0.024803200364112855
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,2,1,128,1,fp8,fp8,0,0.024684800207614897
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,2,2,128,1,float16,fp8,0,0.01940000057220459
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,2,2,128,1,float16,float16,0,0.016888000071048737
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,2,2,128,1,fp8,fp8,0,0.019871999323368073
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,2,1,128,1,float16,float16,0,0.016680000722408293
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,2,1,128,1,float16,fp8,0,0.019449600577354433
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,2,1,128,1,fp8,fp8,0,0.019708800315856933
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,2,2,128,1,float16,float16,0,0.016527999937534333
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,2,2,128,1,float16,fp8,0,0.01709599941968918
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,2,2,128,1,fp8,fp8,0,0.01725279986858368
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,2,1,128,1,float16,float16,0,0.016625599563121797
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,2,1,128,1,float16,fp8,0,0.018545599281787874
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,2,1,128,1,fp8,fp8,0,0.01681919991970062
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,2,2,128,1,float16,float16,0,0.016487999260425566
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,2,2,128,1,float16,fp8,0,0.01663520038127899
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,2,2,128,1,fp8,fp8,0,0.01658399999141693
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,2,1,128,1,float16,float16,0,0.016630400717258454
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,2,1,128,1,float16,fp8,0,0.016575999557971954
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,2,1,128,1,fp8,fp8,0,0.01664319932460785
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,2,2,128,1,float16,float16,0,0.01658080071210861
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,2,2,128,1,float16,fp8,0,0.016651199758052827
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,512,2,1,128,1,fp8,fp8,0,0.20162079334259034
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,2,1,128,1,float16,float16,0,0.016518400609493257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,2,2,128,1,fp8,fp8,0,0.016579200327396394
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,2,1,128,1,float16,fp8,0,0.015688000619411467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,2,1,128,1,fp8,fp8,0,0.01653279960155487
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,2,1,128,1,float16,float16,0,0.05348479747772217
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,2,1,128,1,float16,fp8,0,0.09859359860420228
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,2,2,128,1,float16,float16,0,0.03205440044403076
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,256,2,1,128,1,fp8,fp8,0,0.0997759997844696
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,2,2,128,1,float16,fp8,0,0.05345600247383118
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,2,2,128,1,fp8,fp8,0,0.06785119771957397
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,2,1,128,1,float16,fp8,0,0.053427201509475705
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,2,1,128,1,fp8,fp8,0,0.05347999930381775
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,2,2,128,1,fp8,fp8,0,0.05944640040397644
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,2,2,128,1,float16,float16,0,0.018702399730682374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,2,2,128,1,fp8,fp8,0,0.030956798791885377
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,2,1,128,1,float16,fp8,0,0.030953601002693176
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,2,1,128,1,fp8,fp8,0,0.030928000807762146
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,2,2,128,1,float16,float16,0,0.016607999801635742
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,2,2,128,1,float16,fp8,0,0.020720000565052032
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,2,2,128,1,fp8,fp8,0,0.020793600380420683
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,2,1,128,1,float16,float16,0,0.014667199552059173
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,2,1,128,1,float16,fp8,0,0.021212799847126006
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,2,2,128,1,float16,float16,0,0.014529600739479065
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,2,1,128,1,fp8,fp8,0,0.02072319984436035
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,2,2,128,1,float16,fp8,0,0.016655999422073364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,2,2,128,1,fp8,fp8,0,0.01658080071210861
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,2,1,128,1,float16,float16,0,0.014609600603580474
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,2,1,128,1,float16,fp8,0,0.016648000478744505
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,2,1,128,1,fp8,fp8,0,0.01661760061979294
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,2,1,128,1,fp8,fp8,0,0.033024001121521
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,2,2,128,1,float16,float16,0,0.013478399813175201
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,2,2,128,1,float16,fp8,0,0.014555199444293976
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,2,1,128,1,float16,float16,0,0.012782399356365205
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,2,1,128,1,float16,fp8,0,0.014588800072669984
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,2,1,128,1,fp8,fp8,0,0.014555199444293976
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,2,2,128,1,float16,float16,0,0.01250240057706833
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,2,2,128,1,float16,fp8,0,0.014579200744628906
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,2,2,128,1,fp8,fp8,0,0.014295999705791474
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,2,1,128,1,float16,float16,0,0.012484800070524216
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,2,1,128,1,float16,fp8,0,0.013473600149154663
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,2,1,128,1,fp8,fp8,0,0.013568000495433807
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,2,2,128,1,float16,float16,0,0.012492799758911132
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,2,2,128,1,float16,fp8,0,0.013067199289798737
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,2,2,128,1,fp8,fp8,0,0.01265919953584671
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,2,1,128,1,float16,float16,0,0.012534399330615998
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,2,1,128,1,float16,fp8,0,0.013198399543762207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,2,1,128,1,fp8,fp8,0,0.012673600018024445
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,2,2,128,1,float16,float16,0,0.012532800436019897
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,2,2,128,1,float16,fp8,0,0.012534399330615998
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,2,2,128,1,fp8,fp8,0,0.012508800625801087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,2,1,128,1,float16,float16,0,0.012476799637079239
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,2,1,128,1,float16,fp8,0,0.012593600153923034
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,2,1,128,1,fp8,fp8,0,0.012513600289821625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,2,2,128,1,fp8,fp8,0,0.05347999930381775
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,2,1,128,1,float16,float16,0,0.02889760136604309
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,2,1,128,1,float16,fp8,0,0.055529600381851195
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,2,1,128,1,fp8,fp8,0,0.05554400086402893
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,2,2,128,1,float16,float16,0,0.018952000141143798
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,2,2,128,1,float16,fp8,0,0.030935999751091004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,2,1,128,1,float16,float16,0,0.018568000197410582
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,2,2,128,1,float16,fp8,0,0.030939200520515443
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,2,2,128,1,fp8,fp8,0,0.030935999751091004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,2,1,128,1,float16,float16,0,0.020294399559497835
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,2,1,128,1,float16,fp8,0,0.030969598889350893
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,2,1,128,1,fp8,fp8,0,0.030961599946022034
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,2,2,128,1,float16,float16,0,0.014443199336528777
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,2,2,128,1,float16,fp8,0,0.019281600415706635
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,2,2,128,1,fp8,fp8,0,0.01886879950761795
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,2,1,128,1,float16,float16,0,0.012643200159072877
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,2,1,128,1,float16,fp8,0,0.01863359957933426
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,2,1,128,1,fp8,fp8,0,0.019049599766731262
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,2,2,128,1,float16,float16,0,0.012539200484752655
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,2,2,128,1,float16,fp8,0,0.014604799449443817
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,2,2,128,1,fp8,fp8,0,0.014577600359916686
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,2,1,128,1,float16,float16,0,0.012459199875593185
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,2,1,128,1,float16,fp8,0,0.01459999978542328
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,2,1,128,1,fp8,fp8,0,0.014547200500965118
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,2,2,128,1,float16,fp8,0,0.012537600100040435
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,2,2,128,1,fp8,fp8,0,0.012432000041007996
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,2,1,128,1,float16,float16,0,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,2,1,128,1,float16,fp8,0,0.012511999905109405
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,2,1,128,1,fp8,fp8,0,0.012428800016641617
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,2,2,128,1,float16,fp8,0,0.010864000022411346
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,2,2,128,1,fp8,fp8,0,0.010894399881362916
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,2,1,128,1,float16,float16,0,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,2,1,128,1,float16,fp8,0,0.01082720011472702
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,2,1,128,1,fp8,fp8,0,0.010782399773597717
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,2,2,128,1,fp8,fp8,0,0.014521600306034088
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,2,2,128,1,float16,fp8,0,0.01048479974269867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,2,1,128,1,float16,float16,0,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,2,1,128,1,float16,fp8,0,0.0105103999376297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,2,1,128,1,fp8,fp8,0,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,2,2,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,2,2,128,1,float16,fp8,0,0.010416000336408614
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,2,2,128,1,fp8,fp8,0,0.01043199971318245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,2,1,128,1,float16,float16,0,0.010411199927330018
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,2,1,128,1,float16,fp8,0,0.01045759990811348
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,2,1,128,1,fp8,fp8,0,0.010375999659299851
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,2,2,128,1,float16,float16,0,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,2,2,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,2,2,128,1,fp8,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,2,1,128,1,float16,float16,0,0.010492800176143647
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,2,1,128,1,float16,fp8,0,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,2,1,128,1,fp8,fp8,0,0.010390400141477584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,2,1,128,1,float16,float16,0,0.02479359954595566
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,2,1,128,1,float16,fp8,0,0.03710240125656128
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,64,2,1,128,1,fp8,fp8,0,0.03711200058460236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,2,2,128,1,float16,float16,0,0.016569599509239197
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,2,2,128,1,float16,fp8,0,0.022742399573326112
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,2,2,128,1,fp8,fp8,0,0.022676800191402436
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,2,1,128,1,float16,float16,0,0.016624000668525696
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,2,1,128,1,float16,fp8,0,0.02279040068387985
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,2,1,128,1,fp8,fp8,0,0.022720000147819518
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,128,2,1,128,1,float16,float16,0,0.031656000018119815
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,2,2,128,1,float16,float16,0,0.012478400021791458
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,2,2,128,1,float16,float16,0,0.038017600774765015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,2,2,128,1,fp8,fp8,0,0.014585599303245544
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,2,1,128,1,float16,float16,0,0.012417600303888322
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,2,1,128,1,float16,fp8,0,0.014555199444293976
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,2,1,128,1,fp8,fp8,0,0.014575999975204468
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,2,2,128,1,float16,fp8,0,0.012508800625801087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,2,2,128,1,fp8,fp8,0,0.012540799379348756
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,2,1,128,1,float16,float16,0,0.010471999645233154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,2,1,128,1,float16,fp8,0,0.012494400143623352
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,2,1,128,1,fp8,fp8,0,0.012532800436019897
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,2,2,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,2,2,128,1,float16,fp8,0,0.010444799810647965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,2,2,128,1,fp8,fp8,0,0.01048479974269867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,2,1,128,1,float16,float16,0,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,2,1,128,1,float16,fp8,0,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,2,2,128,1,float16,float16,0,0.010476800054311753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,2,1,128,1,fp8,fp8,0,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,2,2,128,1,float16,fp8,0,0.010384000092744827
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,2,2,128,1,fp8,fp8,0,0.010486400127410889
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,2,2,128,1,float16,float16,0,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,2,1,128,1,float16,float16,0,0.010428799688816071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,2,1,128,1,float16,fp8,0,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,2,1,128,1,fp8,fp8,0,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,2,2,128,1,fp8,fp8,0,0.010502400249242783
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,2,2,128,1,float16,fp8,0,0.010489600151777268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,2,2,128,1,float16,float16,0,0.010409600287675857
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,2,1,128,1,float16,float16,0,0.010497599840164185
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,2,1,128,1,float16,fp8,0,0.010388799756765366
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,2,2,128,1,fp8,fp8,0,0.01040479987859726
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,2,1,128,1,fp8,fp8,0,0.01048320010304451
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,2,2,128,1,fp8,fp8,0,0.0104592002928257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,2,2,128,1,float16,float16,0,0.010375999659299851
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,2,2,128,1,float16,fp8,0,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,2,1,128,1,float16,float16,0,0.010507199913263321
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,2,1,128,1,float16,fp8,0,0.010360000282526016
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,2,1,128,1,fp8,fp8,0,0.010406400263309478
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,2,2,128,1,float16,float16,0,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,2,2,128,1,float16,fp8,0,0.010414399951696397
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,2,2,128,1,fp8,fp8,0,0.010436800122261048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,2,1,128,1,float16,float16,0,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,2,1,128,1,float16,fp8,0,0.01037919968366623
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,2,1,128,1,fp8,fp8,0,0.010398399829864503
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,2,1,128,1,float16,float16,0,0.022771200537681578
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,2,1,128,1,float16,fp8,0,0.026836800575256347
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,32,2,1,128,1,fp8,fp8,0,0.027036800980567932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,2,2,128,1,float16,float16,0,0.014606399834156037
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,2,2,128,1,float16,fp8,0,0.01680160015821457
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,2,2,128,1,fp8,fp8,0,0.016791999340057373
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,2,1,128,1,float16,float16,0,0.014552000164985656
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,2,1,128,1,float16,fp8,0,0.016571199893951415
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,2,1,128,1,fp8,fp8,0,0.016883200407028197
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,2,2,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,2,2,128,1,float16,fp8,0,0.012403199821710587
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,2,2,128,1,fp8,fp8,0,0.01250240057706833
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,2,1,128,1,float16,float16,0,0.010545600205659866
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,2,1,128,1,float16,fp8,0,0.012449599802494049
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,2,1,128,1,fp8,fp8,0,0.012478400021791458
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,2,2,128,1,float16,float16,0,0.010396800190210342
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,2,2,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,2,2,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,2,1,128,1,float16,float16,0,0.010542400181293488
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,2,1,128,1,float16,fp8,0,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,2,1,128,1,fp8,fp8,0,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,2,2,128,1,float16,float16,0,0.010409600287675857
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,2,2,128,1,float16,fp8,0,0.010375999659299851
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,2,2,128,1,fp8,fp8,0,0.01037440001964569
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,2,1,128,1,float16,float16,0,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,2,1,128,1,float16,fp8,0,0.010382399708032609
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,2,1,128,1,fp8,fp8,0,0.010409600287675857
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,2,2,128,1,float16,fp8,0,0.009583999961614608
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,2,2,128,1,fp8,fp8,0,0.009427200257778167
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,2,1,128,1,float16,float16,0,0.01035040020942688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,2,2,128,1,float16,float16,0,0.01051200032234192
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,2,1,128,1,float16,fp8,0,0.008700799942016602
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,2,1,128,1,fp8,fp8,0,0.010332799702882766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,2,2,128,1,float16,float16,0,0.009223999828100205
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,2,2,128,1,float16,fp8,0,0.010337600111961364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,2,2,128,1,float16,fp8,0,0.014537599682807923
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,2,1,128,1,float16,float16,0,0.009239999949932099
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,2,2,128,1,fp8,fp8,0,0.01040479987859726
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,2,2,128,1,float16,float16,0,0.009228800237178803
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,2,1,128,1,fp8,fp8,0,0.008782400190830231
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,2,2,128,1,float16,float16,0,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,2,2,128,1,float16,fp8,0,0.010387200117111205
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,2,1,128,1,float16,float16,0,0.008433599770069123
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,2,2,128,1,fp8,fp8,0,0.00854559987783432
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,2,1,128,1,fp8,fp8,0,0.01035040020942688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,2,1,128,1,float16,fp8,0,0.008441600203514098
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,2,2,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,2,2,128,1,fp8,fp8,0,0.010361599922180175
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,2,1,128,1,float16,float16,0,0.008579199761152267
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,2,2,128,1,float16,float16,0,0.008399999886751174
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,2,1,128,1,fp8,fp8,0,0.009759999811649323
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,2,1,128,1,float16,float16,0,0.020688000321388244
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,2,1,128,1,float16,fp8,0,0.022737599909305573
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,16,2,1,128,1,fp8,fp8,0,0.022758400440216063
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,2,2,128,1,float16,float16,0,0.014537599682807923
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,2,2,128,1,float16,fp8,0,0.01454080045223236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,2,2,128,1,fp8,fp8,0,0.014569599926471711
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,2,1,128,1,float16,float16,0,0.014588800072669984
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,2,1,128,1,float16,fp8,0,0.014612799882888794
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,2,1,128,1,fp8,fp8,0,0.014542399346828461
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,2,2,128,1,float16,float16,0,0.010476800054311753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,2,2,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,2,2,128,1,fp8,fp8,0,0.01048479974269867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,2,1,128,1,float16,float16,0,0.010425599664449692
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,2,1,128,1,float16,fp8,0,0.010513599961996078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,2,1,128,1,fp8,fp8,0,0.01043199971318245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,2,2,128,1,float16,float16,0,0.010407999902963639
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,2,2,128,1,float16,fp8,0,0.010422399640083313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,2,2,128,1,fp8,fp8,0,0.010355199873447418
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,2,1,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,2,1,128,1,float16,fp8,0,0.010398399829864503
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,2,1,128,1,fp8,fp8,0,0.010367999970912933
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,2,2,128,1,float16,fp8,0,0.009071999788284301
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,2,2,128,1,fp8,fp8,0,0.010073599964380264
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,2,2,128,1,float16,float16,0,0.009513600170612336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,2,1,128,1,float16,float16,0,0.009294400364160538
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,2,1,128,1,float16,fp8,0,0.00952640026807785
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,2,1,128,1,fp8,fp8,0,0.008934400230646133
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,2,2,128,1,float16,fp8,0,0.0103472001850605
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,2,2,128,1,fp8,fp8,0,0.008476799726486206
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,2,1,128,1,float16,float16,0,0.010363200306892395
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,2,1,128,1,float16,fp8,0,0.008963199704885483
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,2,1,128,1,fp8,fp8,0,0.009857600182294845
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,2,2,128,1,float16,float16,0,0.010375999659299851
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,2,2,128,1,float16,fp8,0,0.008363199979066848
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,2,2,128,1,fp8,fp8,0,0.008382400125265121
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,2,1,128,1,float16,float16,0,0.009131199866533279
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,2,1,128,1,float16,fp8,0,0.00843520015478134
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,2,1,128,1,fp8,fp8,0,0.0095551997423172
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,2,2,128,1,float16,float16,0,0.010360000282526016
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,2,2,128,1,float16,fp8,0,0.00952799990773201
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,2,2,128,1,fp8,fp8,0,0.009651199728250504
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,2,1,128,1,float16,float16,0,0.010390400141477584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,2,1,128,1,float16,fp8,0,0.00950080007314682
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,2,1,128,1,fp8,fp8,0,0.009272000193595887
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,2,2,128,1,float16,float16,0,0.010308799892663955
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,2,2,128,1,float16,fp8,0,0.008475200086832047
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,2,1,128,1,float16,float16,0,0.009040000289678574
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,2,1,128,1,float16,fp8,0,0.008460800349712371
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,2,1,128,1,fp8,fp8,0,0.008449599891901017
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,2,1,128,1,float16,float16,0,0.019721600413322448
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,2,1,128,1,float16,fp8,0,0.009731200337409974
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,2,1,128,1,float16,fp8,0,0.009675200283527374
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,2,2,128,1,float16,float16,0,0.014310400187969207
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,2,2,128,1,float16,fp8,0,0.012639999389648438
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,2,2,128,1,float16,float16,0,0.010476800054311753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,2,2,128,1,fp8,fp8,0,0.01252480000257492
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,2,1,128,1,float16,float16,0,0.012727999687194824
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,2,1,128,1,float16,fp8,0,0.01255040019750595
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,2,1,128,1,fp8,fp8,0,0.012539200484752655
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,2,2,128,1,float16,float16,0,0.010489600151777268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,2,2,128,1,float16,float16,0,0.01035040020942688
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,2,2,128,1,float16,fp8,0,0.010503999888896942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,2,2,128,1,fp8,fp8,0,0.010452800244092942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,2,1,128,1,float16,fp8,0,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,2,1,128,1,float16,float16,0,0.010446400195360184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,2,2,128,1,float16,float16,0,0.010518400371074677
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,2,2,128,1,float16,fp8,0,0.010428799688816071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,2,2,128,1,fp8,fp8,0,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,2,1,128,1,float16,fp8,0,0.01048320010304451
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,2,1,128,1,float16,float16,0,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,2,2,128,1,float16,float16,0,0.010364799946546554
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,2,2,128,1,float16,float16,0,0.010425599664449692
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,2,2,128,1,float16,fp8,0,0.009700799733400345
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,2,2,128,1,fp8,fp8,0,0.009087999910116195
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,2,1,128,1,float16,float16,0,0.010361599922180175
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,2,1,128,1,float16,fp8,0,0.010377600044012069
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,2,1,128,1,fp8,fp8,0,0.009431999921798707
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,2,2,128,1,float16,float16,0,0.010337600111961364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,2,2,128,1,float16,fp8,0,0.008486399799585343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,2,2,128,1,fp8,fp8,0,0.010284800082445145
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,2,1,128,1,float16,float16,0,0.008472000062465668
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,2,1,128,1,float16,fp8,0,0.009222400188446046
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,2,2,128,1,float16,float16,0,0.010366400331258773
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,2,1,128,1,fp8,fp8,0,0.008395200222730636
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,2,2,128,1,float16,fp8,0,0.009454400092363358
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,2,2,128,1,fp8,fp8,0,0.008425600081682205
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,2,1,128,1,float16,float16,0,0.010391999781131745
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,2,1,128,1,float16,fp8,0,0.008419200032949447
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,2,1,128,1,fp8,fp8,0,0.009534399956464767
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,2,2,128,1,float16,float16,0,0.008720000088214875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,2,2,128,1,float16,fp8,0,0.008417599648237229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,2,2,128,1,fp8,fp8,0,0.00841279998421669
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,2,1,128,1,float16,float16,0,0.008894400298595428
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,2,1,128,1,float16,fp8,0,0.00844319984316826
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,2,1,128,1,fp8,fp8,0,0.008416000008583068
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,2,2,128,1,float16,float16,0,0.008748800307512284
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,2,2,128,1,float16,fp8,0,0.008777599781751633
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,2,2,128,1,fp8,fp8,0,0.008417599648237229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,2,1,128,1,float16,float16,0,0.008416000008583068
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,2,1,128,1,float16,fp8,0,0.008462399989366532
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,2,1,128,1,fp8,fp8,0,0.008377599716186523
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,2,2,128,1,fp8,fp8,0,0.009243199974298478
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,1,1,128,1,float16,float16,0,0.33520801067352296
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,1,1,128,1,float16,float16,0,0.2144927978515625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,1,1,128,1,float16,fp8,0,0.36046719551086426
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16384,1,1,128,1,fp8,fp8,0,0.3592639923095703
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,1,1,128,1,float16,fp8,0,0.22549760341644287
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16384,1,1,128,1,fp8,fp8,0,0.22478721141815186
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,2,1,128,1,fp8,fp8,0,0.018639999628067016
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,1,1,128,1,float16,fp8,0,0.20563840866088867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,1,1,128,1,fp8,fp8,0,0.20724480152130126
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,1,1,128,1,float16,float16,0,0.21393918991088867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,1,1,128,1,float16,fp8,0,0.23903200626373292
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,1,1,128,1,float16,float16,0,0.1632048010826111
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,1,1,128,1,fp8,fp8,0,0.1727023959159851
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,1,1,128,1,float16,float16,0,0.1582319974899292
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,2,1,128,1,fp8,fp8,0,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,1,1,128,1,float16,fp8,0,0.15795999765396118
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,12288,1,1,128,1,fp8,fp8,0,0.15802719593048095
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,1,1,128,1,float16,float16,0,0.15670399665832518
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,1,1,128,1,float16,fp8,0,0.1804800033569336
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,10240,1,1,128,1,fp8,fp8,0,0.17894719839096068
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,1,1,128,1,float16,float16,0,0.13749920129776
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,2,1,128,1,fp8,fp8,0,0.010387200117111205
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,1,1,128,1,float16,fp8,0,0.1456912040710449
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,10240,1,1,128,1,fp8,fp8,0,0.14585119485855103
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,1,1,128,1,float16,fp8,0,0.13332159519195558
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,1,1,128,1,float16,float16,0,0.13289120197296142
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,1,1,128,1,float16,float16,0,0.1897439956665039
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,1,1,128,1,float16,fp8,0,0.2257904052734375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,1,1,128,1,float16,float16,0,0.11963039636611938
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,8192,1,1,128,1,fp8,fp8,0,0.22678399085998535
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,1,1,128,1,float16,fp8,0,0.1363600015640259
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,8192,1,1,128,1,fp8,fp8,0,0.13649760484695433
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,1,1,128,1,float16,float16,0,0.11095839738845825
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,1,1,128,1,float16,fp8,0,0.11731040477752686
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,8192,1,1,128,1,fp8,fp8,0,0.11699039936065674
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,1,1,128,1,float16,float16,0,0.10865440368652343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,1,1,128,1,float16,fp8,0,0.10873440504074097
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,8192,1,1,128,1,fp8,fp8,0,0.10879679918289184
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,1,1,128,1,float16,float16,0,0.12444000244140625
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,1,1,128,1,float16,fp8,0,0.15425120592117308
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,6144,1,1,128,1,fp8,fp8,0,0.15363359451293945
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,1,1,128,1,float16,float16,0,0.09123520255088806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,256,1,2,1,128,1,float16,fp8,0,0.018636800348758698
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16384,1,1,128,1,float16,float16,0,0.2057120084762573
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,1,1,128,1,fp8,fp8,0,0.10472160577774048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,1,1,128,1,float16,fp8,0,0.09156479835510253
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,1,1,128,1,fp8,fp8,0,0.0903663992881775
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,1,1,128,1,float16,float16,0,0.08428800106048584
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,12288,1,1,128,1,fp8,fp8,0,0.23733921051025392
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,1,1,128,1,float16,fp8,0,0.0842032015323639
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,6144,1,1,128,1,fp8,fp8,0,0.08419039845466614
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,1,1,128,1,float16,float16,0,0.1150704026222229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,12288,1,1,128,1,float16,fp8,0,0.171014404296875
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,1,1,128,1,float16,fp8,0,0.15714559555053711
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,1,1,128,1,float16,float16,0,0.07029280066490173
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,1,1,128,1,float16,fp8,0,0.09035840034484863
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,4096,1,1,128,1,fp8,fp8,0,0.09071199893951416
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,1,1,128,1,float16,fp8,0,0.0718608021736145
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,1,1,128,1,fp8,fp8,0,0.0720255970954895
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,1,1,128,1,float16,float16,0,0.05963839888572693
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,1,1,128,1,float16,fp8,0,0.0637440025806427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,4096,1,1,128,1,fp8,fp8,0,0.06361759901046753
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,1,1,128,1,float16,float16,0,0.05955680012702942
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,1,1,128,1,float16,fp8,0,0.059494400024414064
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,1,1,128,1,float16,float16,0,0.07775679826736451
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,10240,1,1,128,1,fp8,fp8,0,0.13371520042419432
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,1,1,128,1,float16,fp8,0,0.11006720066070556
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,3072,1,1,128,1,fp8,fp8,0,0.11156640052795411
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,1,1,128,1,float16,float16,0,0.054838401079177854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,1,1,128,1,float16,fp8,0,0.07087839841842651
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,3072,1,1,128,1,fp8,fp8,0,0.0706928014755249
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,1,1,128,1,float16,float16,0,0.04984000027179718
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,1,1,128,1,float16,fp8,0,0.05750560164451599
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,1,1,128,1,float16,float16,0,0.04773440062999725
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,3072,1,1,128,1,fp8,fp8,0,0.05756800174713135
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,1,1,128,1,float16,fp8,0,0.05140960216522217
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,3072,1,1,128,1,fp8,fp8,0,0.051265597343444824
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,1,1,128,1,float16,float16,0,0.04723840057849884
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,1,1,128,1,float16,fp8,0,0.047363200783729555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,3072,1,1,128,1,fp8,fp8,0,0.04734559953212738
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,1,1,128,1,float16,float16,0,0.07646080255508422
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,1,1,128,1,float16,fp8,0,0.12088960409164429
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,2048,1,1,128,1,fp8,fp8,0,0.1190176010131836
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,1,1,128,1,float16,float16,0,0.045244801044464114
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,1,1,128,1,float16,fp8,0,0.06816319823265075
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,2048,1,1,128,1,fp8,fp8,0,0.06732640266418458
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,1,1,128,1,float16,fp8,0,0.0494159996509552
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,1,1,128,1,fp8,fp8,0,0.04937280118465424
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,1,1,128,1,float16,float16,0,0.037067198753356935
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,1,1,128,1,float16,fp8,0,0.04115839898586273
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,6144,1,1,128,1,float16,fp8,0,0.10468000173568726
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,2048,1,1,128,1,fp8,fp8,0,0.041198399662971494
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,6144,1,1,128,1,float16,float16,0,0.08577119708061218
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,1,1,128,1,fp8,fp8,0,0.037027201056480406
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,1,1,128,1,float16,float16,0,0.03501439988613129
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,1,1,128,1,float16,fp8,0,0.03502239882946014
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,2048,1,1,128,1,fp8,fp8,0,0.035017600655555724
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,1,1,128,1,float16,float16,0,0.05345600247383118
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,1,1,128,1,float16,fp8,0,0.08829759955406188
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1536,1,1,128,1,fp8,fp8,0,0.08824319839477539
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,1,1,128,1,float16,float16,0,0.03671840131282807
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,4096,1,1,128,1,fp8,fp8,0,0.15713920593261718
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,1,1,128,1,float16,fp8,0,0.05346239805221557
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1536,1,1,128,1,fp8,fp8,0,0.053452801704406736
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,1,1,128,1,float16,float16,0,0.031350401043891904
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,1,1,128,1,float16,fp8,0,0.03984639942646027
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,4096,1,1,128,1,float16,float16,0,0.06372479796409607
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1536,1,1,128,1,fp8,fp8,0,0.039263999462127684
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,1,1,128,1,float16,float16,0,0.02911199927330017
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,1,1,128,1,float16,fp8,0,0.03308959901332855
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1536,1,1,128,1,fp8,fp8,0,0.03299199938774109
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,1,1,128,1,float16,float16,0,0.02894560098648071
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,1,1,128,1,float16,fp8,0,0.02972320020198822
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1536,1,1,128,1,fp8,fp8,0,0.030939200520515443
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,1,1,128,1,float16,float16,0,0.028790399432182312
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,1,1,128,1,float16,fp8,0,0.02892799973487854
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,1,1,128,1,float16,float16,0,0.05739359855651856
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,1,1,128,1,float16,float16,0,0.032979199290275575
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,1,1,128,1,float16,fp8,0,0.10235199928283692
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1024,1,1,128,1,fp8,fp8,0,0.1026304006576538
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,1,1,128,1,float16,fp8,0,0.05733280181884766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,1,1,128,1,float16,float16,0,0.02682720124721527
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,4096,1,1,128,1,fp8,fp8,0,0.059620797634124756
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,1,1,128,1,float16,fp8,0,0.039103999733924866
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,1,1,128,1,float16,float16,0,0.024859200417995452
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,1,1,128,1,float16,fp8,0,0.02959200143814087
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1024,1,1,128,1,fp8,fp8,0,0.029569599032402038
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,1,1,128,1,float16,float16,0,0.022767999768257143
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,1,1,128,1,float16,fp8,0,0.024857600033283234
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1024,1,1,128,1,fp8,fp8,0,0.0247871994972229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,1,1,128,1,float16,float16,0,0.02266719937324524
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,1,1,128,1,float16,fp8,0,0.022729599475860597
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1024,1,1,128,1,fp8,fp8,0,0.02271520048379898
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,1,1,128,1,float16,float16,0,0.02274080067873001
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,1,1,128,1,float16,fp8,0,0.02269120067358017
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,2048,1,1,128,1,float16,float16,0,0.03925600051879883
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1024,1,1,128,1,fp8,fp8,0,0.022735999524593355
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,1,1,128,1,float16,float16,0,0.0463456004858017
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,1,1,128,1,float16,fp8,0,0.09247360229492188
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,512,1,1,128,1,fp8,fp8,0,0.09249280095100403
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,1,1,128,1,float16,fp8,0,0.05138400197029114
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,1,1,128,1,float16,float16,0,0.035011199116706845
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,2048,1,1,128,1,float16,fp8,0,0.03707039952278137
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,1,1,128,1,fp8,fp8,0,0.05139679908752441
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,1,1,128,1,float16,float16,0,0.020632000267505647
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,1,1,128,1,float16,fp8,0,0.033011201024055484
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,1,1,128,1,float16,float16,0,0.018568000197410582
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,1,1,128,1,float16,fp8,0,0.022784000635147093
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,512,1,1,128,1,fp8,fp8,0,0.024689599871635437
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,1,1,128,1,float16,float16,0,0.016606399416923524
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,1,1,128,1,float16,fp8,0,0.018723200261592864
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,512,1,1,128,1,fp8,fp8,0,0.0196383997797966
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,1,1,128,1,float16,float16,0,0.01648640036582947
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,1,1,128,1,float16,fp8,0,0.01687840074300766
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,512,1,1,128,1,fp8,fp8,0,0.016646400094032288
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,1,1,128,1,float16,fp8,0,0.016497600078582763
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,1,1,128,1,float16,float16,0,0.016604800522327424
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,512,1,1,128,1,fp8,fp8,0,0.016568000614643096
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,1,1,128,1,float16,float16,0,0.01664319932460785
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,1,1,128,1,float16,fp8,0,0.016491200029850005
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,512,1,1,128,1,fp8,fp8,0,0.016631999611854555
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,1,1,128,1,float16,float16,0,0.022728000581264497
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,1,1,128,1,float16,fp8,0,0.047383999824523924
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,256,1,1,128,1,fp8,fp8,0,0.04732159972190857
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,1,1,128,1,float16,float16,0,0.018668800592422485
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,1,1,128,1,float16,fp8,0,0.028887999057769776
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1536,1,1,128,1,fp8,fp8,0,0.028806400299072266
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,256,1,1,128,1,fp8,fp8,0,0.028987199068069458
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,1,1,128,1,float16,float16,0,0.014564800262451171
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,1,1,128,1,float16,fp8,0,0.020715199410915375
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,256,1,1,128,1,fp8,fp8,0,0.020744000375270844
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,1,1,128,1,float16,float16,0,0.014448000490665436
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1024,1,1,128,1,fp8,fp8,0,0.05552800297737122
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,1,1,128,1,float16,fp8,0,0.01663520038127899
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,256,1,1,128,1,fp8,fp8,0,0.016612799465656282
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1024,1,1,128,1,fp8,fp8,0,0.03914240002632141
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,1,1,128,1,float16,fp8,0,0.014508800208568573
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,1,1,128,1,fp8,fp8,0,0.014584000408649444
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,1,1,128,1,float16,float16,0,0.012532800436019897
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,256,1,1,128,1,float16,float16,0,0.013950400054454803
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,1,1,128,1,float16,fp8,0,0.014547200500965118
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,1,1,128,1,float16,float16,0,0.012545600533485413
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,1,1,128,1,float16,fp8,0,0.012491200119256973
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,256,1,1,128,1,fp8,fp8,0,0.012956799566745758
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,1,1,128,1,float16,fp8,0,0.012539200484752655
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,1,1,128,1,float16,float16,0,0.016297599673271178
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,1,1,128,1,float16,float16,0,0.012638400495052337
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,1,1,128,1,float16,fp8,0,0.02730399966239929
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,128,1,1,128,1,fp8,fp8,0,0.02683199942111969
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,1,1,128,1,float16,float16,0,0.012564800679683685
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,1,1,128,1,float16,fp8,0,0.018598400056362152
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,128,1,1,128,1,fp8,fp8,0,0.018587200343608855
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,1,1,128,1,float16,float16,0,0.012111999839544297
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,1,1,128,1,float16,fp8,0,0.014593599736690522
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,128,1,1,128,1,fp8,fp8,0,0.014524799585342408
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,1,1,128,1,float16,float16,0,0.010518400371074677
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,1,1,128,1,float16,fp8,0,0.012488000094890594
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,128,1,1,128,1,fp8,fp8,0,0.012427199631929398
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,1,1,128,1,float16,float16,0,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,1,1,128,1,float16,fp8,0,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,128,1,1,128,1,fp8,fp8,0,0.010489600151777268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,1,1,128,1,float16,float16,0,0.010513599961996078
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,1,1,128,1,float16,fp8,0,0.010496000200510025
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,128,1,1,128,1,fp8,fp8,0,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,1,1,128,1,float16,float16,0,0.010356800258159637
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,1,1,128,1,float16,fp8,0,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,1,1,128,1,float16,float16,0,0.010499200224876404
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,1,1,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,128,1,1,128,1,fp8,fp8,0,0.01048159971833229
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,1,1,128,1,float16,float16,0,0.012703999876976013
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,1,1,128,1,float16,fp8,0,0.018566399812698364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,1,1,128,1,float16,float16,0,0.010500799864530563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,1,1,128,1,float16,fp8,0,0.014502400159835815
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,512,1,1,128,1,float16,float16,0,0.026819199323654175
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,64,1,1,128,1,fp8,fp8,0,0.014563199877738953
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,1,1,128,1,float16,fp8,0,0.011724799871444702
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,1,1,128,1,float16,float16,0,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,1,1,128,1,fp8,fp8,0,0.012567999958992004
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,1,1,128,1,float16,fp8,0,0.010427200049161912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,64,1,1,128,1,fp8,fp8,0,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,512,1,1,128,1,fp8,fp8,0,0.032948800921440126
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,1,1,128,1,float16,float16,0,0.010425599664449692
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,1,1,128,1,float16,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,1,1,128,1,float16,float16,0,0.009787199646234512
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,1,1,128,1,float16,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,64,1,1,128,1,fp8,fp8,0,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,64,1,1,128,1,fp8,fp8,0,0.009430400282144546
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,1,1,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,1,1,128,1,float16,fp8,0,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,64,1,1,128,1,fp8,fp8,0,0.010409600287675857
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,1,1,128,1,float16,float16,0,0.00995360016822815
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,1,1,128,1,fp8,fp8,0,0.009055999666452407
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,64,1,1,128,1,float16,fp8,0,0.010471999645233154
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,1,1,128,1,float16,float16,0,0.011990399658679962
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,1,1,128,1,float16,fp8,0,0.01454080045223236
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,32,1,1,128,1,fp8,fp8,0,0.014473600685596466
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,1,1,128,1,float16,float16,0,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,1,1,128,1,float16,fp8,0,0.01056319996714592
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,32,1,1,128,1,fp8,fp8,0,0.012464000284671784
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,1,1,128,1,float16,float16,0,0.010391999781131745
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,1,1,128,1,float16,fp8,0,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,32,1,1,128,1,fp8,fp8,0,0.010489600151777268
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,1,1,128,1,float16,float16,0,0.01043040007352829
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,1,1,128,1,float16,fp8,0,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,32,1,1,128,1,fp8,fp8,0,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,1,1,128,1,float16,float16,0,0.01034879982471466
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,1,1,128,1,float16,fp8,0,0.008803199976682663
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,32,1,1,128,1,fp8,fp8,0,0.00841279998421669
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,1,1,128,1,float16,float16,0,0.008446399867534638
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,1,1,128,1,float16,fp8,0,0.008398400247097015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,32,1,1,128,1,fp8,fp8,0,0.008392000198364257
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,1,1,128,1,float16,fp8,0,0.008452799916267396
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,1,1,128,1,float16,float16,0,0.008414400368928909
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,32,1,1,128,1,fp8,fp8,0,0.008353599905967712
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,256,1,1,128,1,fp8,fp8,0,0.014472000300884247
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,1,1,128,1,float16,float16,0,0.00843520015478134
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,1,1,128,1,float16,fp8,0,0.008398400247097015
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,32,1,1,128,1,fp8,fp8,0,0.010360000282526016
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,1,1,128,1,float16,float16,0,0.010478399693965912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,1,1,128,1,float16,fp8,0,0.012494400143623352
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,16,1,1,128,1,fp8,fp8,0,0.012531200051307678
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,1,1,128,1,float16,float16,0,0.010388799756765366
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,1,1,128,1,float16,fp8,0,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,16,1,1,128,1,fp8,fp8,0,0.010409600287675857
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,1,1,128,1,float16,float16,0,0.00942080020904541
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,1,1,128,1,float16,fp8,0,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,16,1,1,128,1,fp8,fp8,0,0.010351999849081039
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,1,1,128,1,fp8,fp8,0,0.010395199805498124
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,1,1,128,1,float16,fp8,0,0.010249599814414978
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,1,1,128,1,float16,fp8,0,0.009438399970531464
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,1,1,128,1,float16,float16,0,0.010422399640083313
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,16,1,1,128,1,fp8,fp8,0,0.009427200257778167
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,1,1,128,1,float16,float16,0,0.010391999781131745
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,1,1,128,1,float16,fp8,0,0.008369600027799606
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,128,1,1,128,1,fp8,fp8,0,0.01048479974269867
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,1,1,128,1,float16,float16,0,0.008420799672603608
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,1,1,128,1,float16,fp8,0,0.008422400057315826
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,16,1,1,128,1,fp8,fp8,0,0.008481600135564805
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,1,1,128,1,float16,float16,0,0.008633600175380706
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,64,1,1,128,1,fp8,fp8,0,0.018676799535751343
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,1,1,128,1,float16,fp8,0,0.008376000076532364
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,16,1,1,128,1,fp8,fp8,0,0.008532799780368805
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,64,1,1,128,1,float16,float16,0,0.010380800068378448
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,1,1,128,1,float16,float16,0,0.010444799810647965
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,1,1,128,1,float16,fp8,0,0.010411199927330018
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,128,1,1,1,128,1,fp8,fp8,0,0.010407999902963639
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,1,1,128,1,float16,fp8,0,0.010487999767065048
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,1,1,128,1,fp8,fp8,0,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,1,1,128,1,float16,float16,0,0.009656000137329101
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,1,1,128,1,float16,fp8,0,0.008617600053548813
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,1,1,128,1,float16,float16,0,0.009028799831867218
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,1,1,128,1,float16,fp8,0,0.008463999629020691
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,1,1,1,128,1,fp8,fp8,0,0.009452799707651139
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,1,1,128,1,float16,float16,0,0.008833599835634231
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,1,1,128,1,float16,fp8,0,0.009332799911499023
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,1,1,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,1,1,128,1,float16,fp8,0,0.009412799775600434
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,1,1,1,128,1,fp8,fp8,0,0.009062399715185165
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,1,1,128,1,float16,float16,0,0.010367999970912933
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,1,1,128,1,fp8,fp8,0,0.008524800091981888
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,1,1,128,1,float16,float16,0,0.010380800068378448
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,1,1,128,1,float16,fp8,0,0.008479999750852585
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,1,1,1,128,1,fp8,fp8,0,0.008796799927949905
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,1,256,1,1,128,1,fp8,fp8,0,0.012521600723266602
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,16,16,1,1,128,1,float16,float16,0,0.008585599809885025
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,4,16,1,1,128,1,fp8,fp8,0,0.009398400038480758
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,64,1,1,1,128,1,float16,float16,0,0.010361599922180175
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,32,1,1,1,128,1,fp8,fp8,0,0.00915839970111847
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,8,1,1,1,128,1,fp8,fp8,0,0.008504000306129456
SGLang,0.5.9,NVIDIA B200,context_attention,trtllm_mha,2,1,1,1,128,1,float16,fp8,0,0.008939199894666672
