framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,beam_width,attn_dtype,kv_cache_dtype,step,latency
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,1,128,1,float16,fp8,0,34.75067138671875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,1,128,1,fp8,fp8,0,34.54625244140625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,4,128,1,float16,fp8,0,34.72791748046875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,2,128,1,fp8,fp8,0,34.789755249023436
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,2,128,1,float16,fp8,0,34.924948120117186
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,4,128,1,fp8,fp8,0,34.930938720703125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,8,128,1,float16,fp8,0,34.85268859863281
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,8,128,1,fp8,fp8,0,34.93690795898438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,96,128,1,float16,fp8,0,18.156820678710936
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,96,128,1,fp8,fp8,0,18.35047149658203
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,1,128,1,float16,fp8,0,17.478634643554688
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,1,128,1,fp8,fp8,0,17.427378845214843
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,2,128,1,float16,fp8,0,17.42821502685547
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,1,128,1,float16,float16,0,21.977925109863282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,2,128,1,fp8,fp8,0,17.463626098632812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,1,128,1,float16,float16,0,44.373565673828125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,4,128,1,float16,fp8,0,17.494281005859374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,2,128,1,float16,float16,0,21.991352844238282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,4,128,1,float16,float16,0,22.14542236328125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,4,128,1,fp8,fp8,0,17.222868347167967
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,8,128,1,float16,fp8,0,17.360955810546876
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,2,128,1,float16,float16,0,44.086422729492185
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,8,128,1,fp8,fp8,0,17.305570983886717
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,4,128,1,float16,float16,0,44.54709167480469
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,96,128,1,float16,fp8,0,9.16683349609375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,1,128,1,float16,float16,0,11.228753662109375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,1,128,1,fp8,fp8,0,8.700249481201173
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,96,128,1,fp8,fp8,0,8.86162109375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,1,128,1,float16,fp8,0,8.602799987792968
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,2,128,1,float16,float16,0,11.124689483642578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,2,128,1,float16,fp8,0,8.697929382324219
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,8,128,1,float16,float16,0,22.1164306640625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,2,128,1,fp8,fp8,0,8.731903839111329
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,4,128,1,float16,fp8,0,8.67184829711914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,4,128,1,fp8,fp8,0,8.843862152099609
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,4,128,1,float16,float16,0,11.279071807861328
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,8,128,1,float16,fp8,0,8.707892608642577
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,8,128,1,float16,float16,0,11.032628631591797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,96,128,1,float16,fp8,0,4.607113647460937
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,96,128,1,fp8,fp8,0,4.54705924987793
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,8,128,1,fp8,fp8,0,8.702983856201172
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,1,128,1,float16,fp8,0,4.27391357421875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,1,128,1,float16,float16,0,5.454825592041016
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,1,128,1,fp8,fp8,0,4.286711883544922
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,8,128,1,float16,float16,0,44.108438110351564
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,2,128,1,float16,float16,0,5.6348625183105465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,2,128,1,float16,fp8,0,4.296607971191406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,2,128,1,fp8,fp8,0,4.311119842529297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,4,128,1,float16,fp8,0,4.313211059570312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,4,128,1,float16,float16,0,5.475609588623047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,4,128,1,fp8,fp8,0,4.514396667480469
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,8,128,1,float16,fp8,0,4.252281570434571
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,8,128,1,float16,float16,0,5.6689918518066404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,8,128,1,fp8,fp8,0,4.30005111694336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,96,128,1,float16,float16,0,5.670991897583008
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,96,128,1,float16,float16,0,11.501676940917969
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,1,128,1,float16,fp8,0,20.203614807128908
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,96,128,1,float16,float16,0,23.357662963867188
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,1,128,1,fp8,fp8,0,20.196189880371094
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,2,128,1,float16,fp8,0,20.085125732421876
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,1,128,1,float16,float16,0,25.417320251464844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,2,128,1,fp8,fp8,0,20.021942138671875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,2,128,1,float16,float16,0,25.627081298828124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,4,128,1,float16,float16,0,25.490567016601563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,4,128,1,float16,fp8,0,20.132887268066405
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,96,128,1,float16,float16,0,13.726052856445312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,96,128,1,float16,fp8,0,10.76338882446289
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,4,128,1,fp8,fp8,0,20.183268737792968
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,8,128,1,float16,fp8,0,20.195208740234374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,8,128,1,fp8,fp8,0,20.175636291503906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,8,128,1,float16,float16,0,25.685812377929686
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,96,128,1,fp8,fp8,0,10.561777496337891
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,1,128,1,fp8,fp8,0,10.205284881591798
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,1,128,1,float16,fp8,0,9.895572662353516
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,1,128,1,float16,float16,0,12.789348602294922
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,2,128,1,float16,fp8,0,10.018310546875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,2,128,1,float16,float16,0,12.820210266113282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,2,128,1,fp8,fp8,0,10.064284515380859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,4,128,1,float16,fp8,0,10.12033920288086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,4,128,1,fp8,fp8,0,10.217391967773438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,96,128,1,float16,fp8,0,5.227195358276367
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,4,128,1,float16,float16,0,12.830221557617188
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,96,128,1,float16,float16,0,6.776273345947265
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,8,128,1,float16,fp8,0,9.995313262939453
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,8,128,1,float16,float16,0,12.95922393798828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,8,128,1,fp8,fp8,0,10.023841857910156
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,96,128,1,fp8,fp8,0,5.274427032470703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,1,128,1,float16,fp8,0,5.003718566894531
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,1,128,1,fp8,fp8,0,5.1301521301269535
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,1,128,1,float16,float16,0,6.471947479248047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,2,128,1,float16,fp8,0,4.929105758666992
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,2,128,1,float16,float16,0,6.39805908203125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,2,128,1,fp8,fp8,0,5.010311889648437
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,4,128,1,float16,fp8,0,4.926545715332031
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,4,128,1,float16,float16,0,6.318798446655274
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,4,128,1,fp8,fp8,0,4.890817642211914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,96,128,1,float16,fp8,0,2.6029935836791993
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,8,128,1,float16,fp8,0,5.112144088745117
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,96,128,1,float16,float16,0,3.4108047485351562
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,8,128,1,float16,float16,0,6.405419158935547
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,8,128,1,fp8,fp8,0,5.012268829345703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,96,128,1,fp8,fp8,0,2.618396759033203
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,1,128,1,float16,fp8,0,2.474969673156738
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,1,128,1,float16,float16,0,2.997591972351074
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,1,128,1,fp8,fp8,0,2.4605295181274416
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,2,128,1,float16,fp8,0,2.9908384323120116
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,2,128,1,float16,float16,0,3.0119504928588867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,2,128,1,fp8,fp8,0,2.498561668395996
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,4,128,1,float16,float16,0,2.977387237548828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,4,128,1,float16,fp8,0,2.443316841125488
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,4,128,1,fp8,fp8,0,2.6566415786743165
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,8,128,1,float16,fp8,0,2.524742317199707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,8,128,1,float16,float16,0,2.9132015228271486
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,8,128,1,fp8,fp8,0,2.6554384231567383
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,1,128,1,fp8,fp8,0,14.194999694824219
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,1,128,1,float16,fp8,0,14.22704620361328
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,2,128,1,float16,fp8,0,14.29908447265625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,2,128,1,fp8,fp8,0,14.225762939453125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,1,128,1,float16,float16,0,17.966510009765624
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,4,128,1,float16,fp8,0,13.911537170410156
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,2,128,1,float16,float16,0,17.868739318847656
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,4,128,1,float16,float16,0,18.284381103515624
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,96,128,1,float16,fp8,0,7.758811187744141
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,96,128,1,fp8,fp8,0,7.650046539306641
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,96,128,1,float16,float16,0,9.662124633789062
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,1,128,1,float16,float16,0,9.018841552734376
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,4,128,1,fp8,fp8,0,14.324993896484376
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,8,128,1,float16,fp8,0,14.537260437011719
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,8,128,1,fp8,fp8,0,14.364616394042969
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,1,128,1,float16,fp8,0,7.107683563232422
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,8,128,1,float16,float16,0,18.144441223144533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,1,128,1,fp8,fp8,0,7.052391815185547
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,2,128,1,float16,fp8,0,7.203311920166016
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,2,128,1,fp8,fp8,0,7.112461090087891
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,2,128,1,float16,float16,0,8.967308807373048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,4,128,1,float16,fp8,0,7.110002899169922
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,4,128,1,float16,float16,0,9.163489532470702
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,4,128,1,fp8,fp8,0,7.134390258789063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,96,128,1,float16,fp8,0,3.8821887969970703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,96,128,1,float16,float16,0,4.796966552734375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,96,128,1,fp8,fp8,0,3.819987106323242
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,8,128,1,fp8,fp8,0,7.183102416992187
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,1,128,1,float16,fp8,0,3.544694519042969
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,1,128,1,float16,float16,0,4.404121780395508
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,1,128,1,fp8,fp8,0,3.5303791046142576
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,8,128,1,float16,fp8,0,7.223278045654297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,2,128,1,float16,fp8,0,3.543289566040039
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,2,128,1,float16,float16,0,4.557108688354492
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,2,128,1,fp8,fp8,0,3.6418304443359375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,4,128,1,float16,float16,0,4.52917594909668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,4,128,1,fp8,fp8,0,3.5447761535644533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,4,128,1,float16,fp8,0,3.5860065460205077
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,96,128,1,float16,float16,0,2.279083251953125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,8,128,1,float16,float16,0,4.229727935791016
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,8,128,1,float16,fp8,0,3.55
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,8,128,1,float16,float16,0,9.091635131835938
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,8,128,1,fp8,fp8,0,3.694200134277344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,96,128,1,fp8,fp8,0,1.9202751159667968
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,96,128,1,float16,fp8,0,2.318684768676758
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,1,128,1,float16,float16,0,2.0711952209472657
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,1,128,1,fp8,fp8,0,2.100062370300293
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,2,128,1,float16,float16,0,2.105681610107422
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,2,128,1,float16,fp8,0,1.73797607421875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,2,128,1,fp8,fp8,0,1.8428815841674804
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,1,128,1,float16,fp8,0,1.9950336456298827
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,4,128,1,float16,fp8,0,1.772260856628418
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,4,128,1,float16,float16,0,2.1510719299316405
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,8,128,1,fp8,fp8,0,1.7411439895629883
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,4,128,1,fp8,fp8,0,2.022105598449707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,8,128,1,float16,float16,0,2.1666128158569338
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,8,128,1,float16,fp8,0,2.0839344024658204
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,1,128,1,float16,fp8,0,18.88487548828125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,1,128,1,fp8,fp8,0,18.749571228027342
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,2,128,1,float16,fp8,0,18.620664978027342
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,2,128,1,fp8,fp8,0,18.983114624023436
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,1,128,1,float16,float16,0,23.597161865234376
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,4,128,1,float16,fp8,0,18.59571228027344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,2,128,1,float16,float16,0,23.595376586914064
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,4,128,1,float16,float16,0,23.686204528808595
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,96,128,1,float16,fp8,0,10.319654083251953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,96,128,1,fp8,fp8,0,10.15759048461914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,96,128,1,float16,float16,0,12.855183410644532
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,1,128,1,float16,float16,0,11.916265869140625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,4,128,1,fp8,fp8,0,18.97684326171875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,8,128,1,float16,fp8,0,18.847679138183594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,8,128,1,fp8,fp8,0,19.151948547363283
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,8,128,1,float16,float16,0,23.927397155761717
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,1,128,1,float16,fp8,0,9.405036926269531
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,1,128,1,fp8,fp8,0,9.32350845336914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,2,128,1,float16,fp8,0,9.350990295410156
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,2,128,1,float16,float16,0,11.800379180908203
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,2,128,1,fp8,fp8,0,9.5212158203125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,4,128,1,float16,fp8,0,9.482708740234376
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,4,128,1,float16,float16,0,11.916825866699218
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,4,128,1,fp8,fp8,0,9.32461929321289
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,96,128,1,float16,fp8,0,5.103766250610351
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,8,128,1,float16,fp8,0,9.446153259277343
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,96,128,1,float16,float16,0,6.449017333984375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,96,128,1,fp8,fp8,0,5.164833450317383
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,8,128,1,float16,float16,0,12.09722900390625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,1,128,1,float16,float16,0,5.919267272949218
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,8,128,1,fp8,fp8,0,9.37457275390625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,1,128,1,float16,fp8,0,4.661729431152343
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,1,128,1,fp8,fp8,0,4.671134567260742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,2,128,1,float16,float16,0,5.958824157714844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,2,128,1,fp8,fp8,0,4.774723052978516
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,2,128,1,float16,fp8,0,4.622953414916992
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,4,128,1,float16,fp8,0,4.695665740966797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,4,128,1,fp8,fp8,0,4.724631881713867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,4,128,1,float16,float16,0,5.866955184936524
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,96,128,1,float16,float16,0,3.062451171875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,96,128,1,fp8,fp8,0,2.592032051086426
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,8,128,1,float16,fp8,0,4.856560134887696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,8,128,1,fp8,fp8,0,4.759983825683594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,8,128,1,float16,float16,0,5.928950500488281
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,1,128,1,float16,fp8,0,2.2511552810668944
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,1,128,1,float16,float16,0,2.955326461791992
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,1,128,1,fp8,fp8,0,2.478014373779297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,2,128,1,float16,fp8,0,2.352697563171387
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,2,128,1,fp8,fp8,0,2.428812789916992
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,2,128,1,float16,float16,0,2.849336051940918
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,4,128,1,float16,fp8,0,2.273567962646484
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,4,128,1,fp8,fp8,0,2.5289024353027343
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,4,128,1,float16,float16,0,3.0160207748413086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,96,128,1,float16,fp8,0,2.5444671630859377
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,96,128,1,float16,fp8,0,1.302891159057617
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,8,128,1,float16,fp8,0,2.318281555175781
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,96,128,1,fp8,fp8,0,1.3000927925109864
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,1,128,1,float16,fp8,0,1.1597184181213378
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,8,128,1,float16,float16,0,2.7576015472412108
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,1,128,1,float16,float16,0,1.5248512268066405
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,1,128,1,fp8,fp8,0,1.168166446685791
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,2,128,1,float16,float16,0,1.3326751708984375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,2,128,1,fp8,fp8,0,1.1594223976135254
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,2,128,1,float16,fp8,0,1.1702143669128418
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,4,128,1,float16,float16,0,1.507972812652588
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,4,128,1,fp8,fp8,0,1.1632847785949707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,4,128,1,float16,fp8,0,1.3479968070983888
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,8,128,1,float16,fp8,0,1.1617072105407715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,8,128,1,float16,float16,0,1.3833439826965332
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,8,128,1,fp8,fp8,0,1.161460781097412
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,96,128,1,float16,float16,0,1.701228713989258
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,8,128,1,fp8,fp8,0,2.3910560607910156
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,1,128,1,float16,fp8,0,10.884353637695312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,1,128,1,fp8,fp8,0,10.975778961181641
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,2,128,1,float16,fp8,0,11.002798461914063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,4,128,1,float16,fp8,0,10.961686706542968
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,2,128,1,fp8,fp8,0,10.777641296386719
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,1,128,1,float16,float16,0,13.715890502929687
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,2,128,1,float16,float16,0,13.652606201171874
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,4,128,1,float16,float16,0,13.6420654296875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,96,128,1,float16,fp8,0,6.108323287963867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,96,128,1,fp8,fp8,0,6.1146385192871096
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,96,128,1,float16,float16,0,7.807514953613281
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,1,128,1,float16,float16,0,7.0077667236328125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,4,128,1,fp8,fp8,0,11.092921447753906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,8,128,1,float16,fp8,0,11.07629623413086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,8,128,1,fp8,fp8,0,11.283979034423828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,8,128,1,float16,float16,0,14.209947204589843
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,1,128,1,float16,fp8,0,5.429747009277344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,1,128,1,fp8,fp8,0,5.399515151977539
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,2,128,1,fp8,fp8,0,5.480105590820313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,2,128,1,float16,fp8,0,5.488750457763672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,2,128,1,float16,float16,0,6.730214691162109
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,4,128,1,float16,fp8,0,5.551545715332031
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,4,128,1,float16,float16,0,6.923518371582031
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,4,128,1,fp8,fp8,0,5.528121566772461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,96,128,1,float16,float16,0,3.6972270965576173
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,8,128,1,float16,fp8,0,5.494823837280274
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,96,128,1,float16,fp8,0,3.0970943450927733
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,8,128,1,float16,float16,0,7.070001220703125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,96,128,1,fp8,fp8,0,2.9987407684326173
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,8,128,1,fp8,fp8,0,5.44713134765625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,1,128,1,float16,float16,0,3.3974815368652345
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,1,128,1,float16,fp8,0,2.705292892456055
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,1,128,1,fp8,fp8,0,2.9194032669067385
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,2,128,1,float16,float16,0,3.3044639587402345
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,2,128,1,float16,fp8,0,2.6525152206420897
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,2,128,1,fp8,fp8,0,3.0054128646850584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,4,128,1,float16,fp8,0,2.767323112487793
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,4,128,1,fp8,fp8,0,2.9215200424194334
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,4,128,1,float16,float16,0,3.3353614807128906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,96,128,1,float16,float16,0,1.736814308166504
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,8,128,1,float16,float16,0,3.418560028076172
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,96,128,1,float16,fp8,0,1.5599136352539062
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,8,128,1,float16,fp8,0,2.6855823516845705
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,96,128,1,fp8,fp8,0,1.5388959884643554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,1,128,1,float16,float16,0,1.5761471748352052
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,8,128,1,fp8,fp8,0,3.3309696197509764
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,2,128,1,float16,fp8,0,1.3630672454833985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,1,128,1,float16,fp8,0,1.626103973388672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,1,128,1,fp8,fp8,0,1.5961119651794433
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,2,128,1,float16,float16,0,1.816979217529297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,2,128,1,fp8,fp8,0,1.412492847442627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,4,128,1,float16,float16,0,1.5517328262329102
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,4,128,1,float16,fp8,0,1.5471023559570312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,4,128,1,fp8,fp8,0,1.3932239532470703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,96,128,1,float16,fp8,0,0.7877103805541992
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,96,128,1,float16,float16,0,1.0057344436645508
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,8,128,1,float16,fp8,0,1.4100607872009276
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,8,128,1,float16,float16,0,1.5863696098327638
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,96,128,1,fp8,fp8,0,0.9492400169372559
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,1,128,1,float16,float16,0,0.7922671794891357
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,2,128,1,float16,float16,0,0.8834959983825683
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,1,128,1,fp8,fp8,0,0.6993872165679932
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,2,128,1,float16,fp8,0,0.9071727752685547
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,2,128,1,fp8,fp8,0,0.8018879890441895
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,4,128,1,float16,float16,0,0.7940336227416992
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,4,128,1,float16,fp8,0,0.8018655776977539
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,8,128,1,float16,float16,0,0.8667759895324707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,8,128,1,float16,fp8,0,0.7241456031799316
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,4,128,1,fp8,fp8,0,0.6995552062988282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,8,128,1,fp8,fp8,0,0.7528463840484619
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,8,128,1,fp8,fp8,0,1.3972432136535644
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,1,128,1,float16,fp8,0,0.7086143970489502
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,1,128,1,fp8,fp8,0,10.385475158691406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,1,128,1,float16,fp8,0,10.425643157958984
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,2,128,1,float16,fp8,0,10.335702514648437
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,2,128,1,fp8,fp8,0,10.218247985839843
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,1,128,1,float16,float16,0,13.128176879882812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,2,128,1,float16,float16,0,12.958883666992188
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,4,128,1,float16,float16,0,12.8622314453125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,4,128,1,float16,fp8,0,10.344891357421876
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,96,128,1,float16,fp8,0,6.0799713134765625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,96,128,1,float16,float16,0,7.635374450683594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,4,128,1,fp8,fp8,0,10.590988922119141
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,96,128,1,fp8,fp8,0,6.0670734405517575
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,8,128,1,float16,fp8,0,10.581244659423827
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,8,128,1,fp8,fp8,0,10.663100433349609
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,8,128,1,float16,float16,0,13.370747375488282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,1,128,1,float16,fp8,0,5.1917167663574215
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,1,128,1,float16,float16,0,6.469465637207032
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,1,128,1,fp8,fp8,0,5.331403350830078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,2,128,1,float16,fp8,0,5.1389423370361325
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,2,128,1,float16,float16,0,6.604576110839844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,2,128,1,fp8,fp8,0,5.189807891845703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,4,128,1,float16,float16,0,6.61438217163086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,4,128,1,float16,fp8,0,5.252241516113282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,4,128,1,fp8,fp8,0,5.218560028076172
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,96,128,1,float16,fp8,0,3.0386751174926756
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,8,128,1,float16,fp8,0,5.245068740844727
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,96,128,1,float16,float16,0,3.7717967987060548
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,8,128,1,float16,float16,0,6.7031005859375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,96,128,1,fp8,fp8,0,3.116828727722168
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,1,128,1,float16,fp8,0,2.6351152420043946
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,1,128,1,float16,float16,0,3.0360271453857424
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,8,128,1,fp8,fp8,0,5.2545726776123045
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,1,128,1,fp8,fp8,0,2.5905935287475588
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,2,128,1,float16,fp8,0,2.888579177856445
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,2,128,1,float16,float16,0,3.091116714477539
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,2,128,1,fp8,fp8,0,2.863315200805664
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,4,128,1,float16,fp8,0,2.6029312133789064
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,4,128,1,float16,float16,0,3.1101200103759767
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,4,128,1,fp8,fp8,0,2.6679248809814453
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,8,128,1,float16,fp8,0,2.594001579284668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,8,128,1,float16,float16,0,3.068332862854004
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,96,128,1,float16,fp8,0,1.506447982788086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,96,128,1,fp8,fp8,0,1.5102879524230957
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,96,128,1,float16,float16,0,1.844428825378418
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,1,128,1,float16,float16,0,1.7243791580200196
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,1,128,1,float16,fp8,0,1.3026399612426758
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,1,128,1,fp8,fp8,0,1.3205167770385742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,2,128,1,float16,fp8,0,1.3001791954040527
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,2,128,1,fp8,fp8,0,1.2916000366210938
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,4,128,1,float16,float16,0,1.6380847930908202
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,4,128,1,fp8,fp8,0,1.2967264175415039
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,8,128,1,float16,float16,0,1.4621664047241212
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,8,128,1,float16,fp8,0,1.312998390197754
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,8,128,1,fp8,fp8,0,1.2896448135375977
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,96,128,1,float16,fp8,0,0.7643008232116699
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,96,128,1,float16,float16,0,0.9769871711730957
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,96,128,1,fp8,fp8,0,0.9142080307006836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,1,128,1,float16,float16,0,0.7423903942108154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,1,128,1,float16,fp8,0,0.6707327842712403
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,1,128,1,fp8,fp8,0,0.6619328022003174
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,8,128,1,fp8,fp8,0,2.57543830871582
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,2,128,1,float16,float16,0,0.8357359886169433
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,2,128,1,float16,fp8,0,0.759876823425293
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,2,128,1,fp8,fp8,0,0.6661231994628907
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,4,128,1,float16,fp8,0,0.6622352123260498
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,4,128,1,float16,float16,0,0.7433680057525635
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,4,128,1,fp8,fp8,0,0.6630864143371582
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,8,128,1,float16,fp8,0,0.6832640171051025
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,8,128,1,float16,float16,0,0.8466832160949707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,96,128,1,float16,float16,0,0.44815359115600584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,96,128,1,float16,fp8,0,0.4051216125488281
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,2,128,1,float16,float16,0,1.6787824630737305
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,8,128,1,fp8,fp8,0,0.7601391792297363
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,96,128,1,fp8,fp8,0,0.40253920555114747
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,1,128,1,float16,float16,0,0.3798687934875488
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,1,128,1,float16,fp8,0,0.40735039710998533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,2,128,1,float16,float16,0,0.3795552015304565
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,2,128,1,float16,fp8,0,0.35335679054260255
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,2,128,1,fp8,fp8,0,0.4185935974121094
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,4,128,1,float16,float16,0,0.3819216012954712
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,4,128,1,float16,fp8,0,0.35727519989013673
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,4,128,1,fp8,fp8,0,0.406009578704834
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,8,128,1,float16,fp8,0,0.3530463933944702
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,8,128,1,fp8,fp8,0,0.3695519924163818
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,8,128,1,float16,float16,0,0.4047344207763672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,1,128,1,fp8,fp8,0,0.35537760257720946
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,4,128,1,float16,fp8,0,1.479640007019043
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,1,128,1,float16,fp8,0,6.187625503540039
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,1,128,1,fp8,fp8,0,6.3107646942138675
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,2,128,1,float16,fp8,0,6.26988639831543
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,1,128,1,float16,float16,0,7.593708801269531
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,2,128,1,float16,float16,0,7.551907348632812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,2,128,1,fp8,fp8,0,6.1262367248535154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,4,128,1,float16,float16,0,7.621995544433593
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,96,128,1,float16,float16,0,4.556353759765625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,4,128,1,float16,fp8,0,6.2794654846191404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,4,128,1,fp8,fp8,0,6.396894454956055
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,96,128,1,float16,fp8,0,3.6645774841308594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,8,128,1,float16,fp8,0,6.213275146484375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,8,128,1,fp8,fp8,0,6.260942459106445
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,8,128,1,float16,float16,0,8.056339263916016
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,96,128,1,fp8,fp8,0,3.7460704803466798
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,1,128,1,float16,fp8,0,3.1275711059570312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,1,128,1,fp8,fp8,0,3.1646175384521484
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,1,128,1,float16,float16,0,3.86242561340332
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,2,128,1,float16,fp8,0,3.068390464782715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,2,128,1,float16,float16,0,3.719539260864258
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,2,128,1,fp8,fp8,0,3.1516159057617186
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,4,128,1,float16,fp8,0,3.085316848754883
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,4,128,1,float16,float16,0,3.9489025115966796
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,96,128,1,float16,fp8,0,1.8645151138305665
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,8,128,1,fp8,fp8,0,3.081558418273926
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,8,128,1,float16,float16,0,3.9718799591064453
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,8,128,1,float16,fp8,0,3.25714225769043
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,1,128,1,float16,float16,0,1.9642704010009766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,96,128,1,fp8,fp8,0,1.846931266784668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,1,128,1,float16,fp8,0,1.5543567657470703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,1,128,1,fp8,fp8,0,1.5686703681945802
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,2,128,1,float16,fp8,0,1.5716943740844727
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,2,128,1,float16,float16,0,2.012491226196289
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,2,128,1,fp8,fp8,0,1.734115219116211
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,4,128,1,float16,fp8,0,1.5467984199523925
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,4,128,1,float16,float16,0,1.7764127731323243
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,4,128,1,fp8,fp8,0,1.5517487525939941
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,4,128,1,fp8,fp8,0,3.092795181274414
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,8,128,1,float16,float16,0,1.8173696517944335
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,96,128,1,float16,float16,0,1.0725808143615723
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,8,128,1,float16,fp8,0,1.7963407516479493
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,96,128,1,fp8,fp8,0,0.9717151641845703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,8,128,1,fp8,fp8,0,1.588691234588623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,1,128,1,float16,float16,0,0.8653391838073731
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,96,128,1,float16,fp8,0,1.0611167907714845
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,1,128,1,float16,fp8,0,0.9481391906738281
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,1,128,1,fp8,fp8,0,0.8042719841003418
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,2,128,1,float16,float16,0,0.8726192474365234
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,2,128,1,float16,fp8,0,0.7961152076721192
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,2,128,1,fp8,fp8,0,0.808460807800293
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,4,128,1,float16,float16,0,0.871884822845459
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,4,128,1,float16,fp8,0,0.8244879722595215
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,4,128,1,fp8,fp8,0,0.8806207656860352
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,8,128,1,float16,float16,0,0.8839296340942383
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,96,128,1,float16,float16,0,0.5474592208862304
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,8,128,1,float16,fp8,0,0.8185487747192383
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,8,128,1,fp8,fp8,0,0.7910128116607666
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,96,128,1,float16,fp8,0,0.4911680221557617
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,96,128,1,fp8,fp8,0,0.49415202140808107
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,1,128,1,float16,float16,0,0.4427824020385742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,1,128,1,float16,fp8,0,0.42787680625915525
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,1,128,1,fp8,fp8,0,0.435975980758667
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,2,128,1,float16,float16,0,0.47549757957458494
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,2,128,1,fp8,fp8,0,0.43974881172180175
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,4,128,1,float16,float16,0,0.4432032108306885
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,4,128,1,float16,fp8,0,0.4237311840057373
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,96,128,1,float16,float16,0,2.309646415710449
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,4,128,1,fp8,fp8,0,0.4274623870849609
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,8,128,1,float16,float16,0,0.45937438011169435
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,96,128,1,float16,float16,0,0.2872015953063965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,96,128,1,float16,fp8,0,0.2678512096405029
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,1,128,1,float16,float16,0,0.27100799083709715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,96,128,1,fp8,fp8,0,0.2600383996963501
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,1,128,1,float16,fp8,0,0.23814239501953124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,1,128,1,fp8,fp8,0,0.23324639797210694
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,8,128,1,float16,fp8,0,0.45316319465637206
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,2,128,1,float16,fp8,0,0.23440160751342773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,4,128,1,float16,fp8,0,0.22812960147857667
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,4,128,1,fp8,fp8,0,0.24498240947723388
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,8,128,1,float16,float16,0,0.2433568000793457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,8,128,1,float16,fp8,0,0.22821919918060302
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,8,128,1,fp8,fp8,0,0.2317568063735962
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,2,128,1,float16,fp8,0,0.411575984954834
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,8,128,1,fp8,fp8,0,0.4287087917327881
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,2,128,1,float16,float16,0,0.2378432035446167
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,2,128,1,fp8,fp8,0,0.22274720668792725
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,4,128,1,float16,float16,0,0.24738879203796388
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,1,128,1,float16,fp8,0,6.165039825439453
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,1,128,1,fp8,fp8,0,6.239462280273438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,1,128,1,float16,float16,0,7.795916748046875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,2,128,1,float16,fp8,0,6.18218879699707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,2,128,1,float16,float16,0,7.502881622314453
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,2,128,1,fp8,fp8,0,6.177876663208008
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,4,128,1,float16,fp8,0,6.2209632873535154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,4,128,1,float16,float16,0,7.493595123291016
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,4,128,1,fp8,fp8,0,6.243102264404297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,96,128,1,float16,fp8,0,3.8674816131591796
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,8,128,1,float16,float16,0,7.930532836914063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,8,128,1,float16,fp8,0,6.340355300903321
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,96,128,1,fp8,fp8,0,3.891864013671875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,1,128,1,float16,float16,0,3.6247791290283202
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,96,128,1,float16,float16,0,4.796672058105469
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,8,128,1,fp8,fp8,0,6.204252624511719
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,1,128,1,float16,fp8,0,3.099083137512207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,1,128,1,fp8,fp8,0,3.094206428527832
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,2,128,1,float16,fp8,0,3.1112592697143553
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,2,128,1,float16,float16,0,3.6529727935791017
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,2,128,1,fp8,fp8,0,3.2018016815185546
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,4,128,1,float16,float16,0,3.7858303070068358
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,4,128,1,float16,fp8,0,3.137356758117676
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,4,128,1,fp8,fp8,0,3.1419551849365233
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,96,128,1,float16,float16,0,2.316417694091797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,96,128,1,float16,fp8,0,2.17535514831543
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,8,128,1,float16,fp8,0,3.182676887512207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,1,128,1,float16,float16,0,1.6716495513916017
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,8,128,1,float16,float16,0,3.816489410400391
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,8,128,1,fp8,fp8,0,3.1756752014160154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,1,128,1,float16,fp8,0,1.7857936859130858
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,1,128,1,fp8,fp8,0,1.6204959869384765
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,2,128,1,float16,float16,0,1.7006975173950196
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,2,128,1,float16,fp8,0,1.810968017578125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,2,128,1,fp8,fp8,0,1.8289199829101563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,4,128,1,float16,float16,0,1.7068912506103515
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,4,128,1,float16,fp8,0,1.609391975402832
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,96,128,1,fp8,fp8,0,1.9881423950195312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,8,128,1,float16,fp8,0,1.603068733215332
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,8,128,1,fp8,fp8,0,1.588771152496338
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,96,128,1,float16,fp8,0,1.1445743560791015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,96,128,1,fp8,fp8,0,0.9988816261291504
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,1,128,1,float16,float16,0,0.8486495971679687
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,1,128,1,float16,fp8,0,0.7956736087799072
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,1,128,1,fp8,fp8,0,0.8033503532409668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,2,128,1,float16,float16,0,0.8579615592956543
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,2,128,1,float16,fp8,0,0.7911231994628907
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,2,128,1,fp8,fp8,0,0.8087615966796875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,4,128,1,float16,fp8,0,0.8060928344726562
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,4,128,1,float16,float16,0,0.8501168251037597
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,4,128,1,fp8,fp8,0,0.8433152198791504
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,8,128,1,float16,float16,0,0.8661760330200196
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,8,128,1,float16,fp8,0,0.8075311660766602
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,8,128,1,fp8,fp8,0,0.7924032211303711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,96,128,1,float16,float16,0,0.567903995513916
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,96,128,1,float16,fp8,0,0.5230912208557129
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,1,128,1,float16,float16,0,0.4311503887176514
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,96,128,1,fp8,fp8,0,0.5067872047424317
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,4,128,1,fp8,fp8,0,1.5603952407836914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,1,128,1,float16,fp8,0,0.409603214263916
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,1,128,1,fp8,fp8,0,0.4172719955444336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,2,128,1,float16,float16,0,0.4341616153717041
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,96,128,1,float16,float16,0,1.119983959197998
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,2,128,1,fp8,fp8,0,0.42947840690612793
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,4,128,1,float16,float16,0,0.4351840019226074
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,4,128,1,float16,fp8,0,0.40998239517211915
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,4,128,1,fp8,fp8,0,0.4169616222381592
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,8,128,1,float16,float16,0,0.44159998893737795
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,96,128,1,float16,float16,0,0.29409921169281006
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,96,128,1,float16,fp8,0,0.2678544044494629
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,8,128,1,float16,fp8,0,0.41021437644958497
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,8,128,1,fp8,fp8,0,0.418398380279541
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,96,128,1,fp8,fp8,0,0.2716768026351929
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,1,128,1,float16,float16,0,0.22695999145507811
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,1,128,1,fp8,fp8,0,0.21819519996643066
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,2,128,1,float16,float16,0,0.22616639137268066
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,1,128,1,float16,fp8,0,0.21870241165161133
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,2,128,1,fp8,fp8,0,0.22064800262451173
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,4,128,1,float16,float16,0,0.22877120971679688
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,4,128,1,float16,fp8,0,0.21852478981018067
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,4,128,1,fp8,fp8,0,0.21878399848937988
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,8,128,1,float16,fp8,0,0.22089920043945313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,8,128,1,fp8,fp8,0,0.22124319076538085
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,96,128,1,float16,float16,0,0.1608896017074585
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,96,128,1,float16,fp8,0,0.14816319942474365
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,96,128,1,fp8,fp8,0,0.14825119972229003
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,1,128,1,float16,float16,0,0.12761280536651612
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,1,128,1,float16,fp8,0,0.12172000408172608
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,1,128,1,fp8,fp8,0,0.1219599962234497
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,2,128,1,float16,float16,0,0.12637280225753783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,2,128,1,float16,fp8,0,0.12304160594940186
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,2,128,1,fp8,fp8,0,0.12173279523849487
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,4,128,1,float16,fp8,0,0.12214080095291138
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,4,128,1,fp8,fp8,0,0.12245440483093262
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,8,128,1,float16,float16,0,0.12973599433898925
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,8,128,1,float16,fp8,0,0.12214399576187134
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,8,128,1,fp8,fp8,0,0.12226239442825318
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,4,128,1,float16,float16,0,0.12985119819641114
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,2,128,1,float16,fp8,0,0.40962882041931153
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,2,128,1,float16,fp8,0,0.22375359535217285
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,8,128,1,float16,float16,0,0.22965760231018068
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,1,128,1,float16,fp8,0,3.9054096221923826
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,1,128,1,fp8,fp8,0,3.906139373779297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,1,128,1,float16,float16,0,4.565094375610352
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,2,128,1,float16,float16,0,4.512176132202148
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,8,128,1,float16,float16,0,2.0024656295776366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,2,128,1,float16,fp8,0,3.903416061401367
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,2,128,1,fp8,fp8,0,3.898057556152344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,4,128,1,float16,float16,0,4.593664169311523
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,4,128,1,float16,fp8,0,3.9020622253417967
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,4,128,1,fp8,fp8,0,3.903326416015625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,96,128,1,float16,float16,0,2.9948720932006836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,8,128,1,float16,fp8,0,3.9878158569335938
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,8,128,1,fp8,fp8,0,3.912907028198242
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,8,128,1,float16,float16,0,4.761483383178711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,1,128,1,float16,float16,0,2.085875129699707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,96,128,1,fp8,fp8,0,2.6716480255126953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,1,128,1,fp8,fp8,0,1.9628704071044922
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,1,128,1,float16,fp8,0,2.066414451599121
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,2,128,1,float16,float16,0,2.1831615447998045
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,2,128,1,float16,fp8,0,2.129654312133789
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,4,128,1,float16,float16,0,2.1837215423583984
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,2,128,1,fp8,fp8,0,2.175948715209961
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,96,128,1,float16,fp8,0,2.539601516723633
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,4,128,1,float16,fp8,0,1.9595327377319336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,4,128,1,fp8,fp8,0,1.9665727615356445
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,8,128,1,float16,fp8,0,1.9902175903320312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,8,128,1,float16,float16,0,2.3128448486328126
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,96,128,1,float16,float16,0,1.4472127914428712
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,1,128,1,float16,float16,0,1.0429519653320312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,96,128,1,fp8,fp8,0,1.287839984893799
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,1,128,1,float16,fp8,0,0.9926400184631348
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,8,128,1,fp8,fp8,0,2.1437583923339845
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,1,128,1,fp8,fp8,0,1.0335311889648438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,2,128,1,float16,float16,0,1.2001312255859375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,2,128,1,fp8,fp8,0,0.9927231788635253
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,4,128,1,float16,float16,0,1.070857620239258
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,4,128,1,float16,fp8,0,0.9922847747802734
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,2,128,1,float16,fp8,0,1.0572192192077636
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,4,128,1,fp8,fp8,0,1.0100367546081543
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,8,128,1,float16,float16,0,1.0730655670166016
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,96,128,1,float16,fp8,0,0.6526095867156982
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,96,128,1,float16,float16,0,0.7630064010620117
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,8,128,1,float16,fp8,0,1.169153594970703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,1,128,1,float16,float16,0,0.527239990234375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,1,128,1,float16,fp8,0,0.5831920146942139
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,1,128,1,fp8,fp8,0,0.5073279857635498
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,2,128,1,float16,fp8,0,0.5291791915893554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,2,128,1,fp8,fp8,0,0.5076111793518067
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,2,128,1,float16,float16,0,0.6086207866668701
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,4,128,1,float16,float16,0,0.5564720153808593
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,4,128,1,float16,fp8,0,0.5069392204284668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,4,128,1,fp8,fp8,0,0.5261375904083252
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,8,128,1,float16,float16,0,0.5383935928344726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,96,128,1,float16,fp8,0,1.2810000419616698
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,8,128,1,float16,fp8,0,0.5997039794921875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,8,128,1,fp8,fp8,0,0.5514895915985107
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,96,128,1,float16,float16,0,0.37283198833465575
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,1,128,1,float16,float16,0,0.2709791898727417
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,1,128,1,float16,fp8,0,0.29314560890197755
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,1,128,1,fp8,fp8,0,0.2767247915267944
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,2,128,1,float16,float16,0,0.2717152118682861
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,2,128,1,float16,fp8,0,0.26575360298156736
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,96,128,1,fp8,fp8,0,0.3390944004058838
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,2,128,1,fp8,fp8,0,0.28275198936462403
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,4,128,1,float16,float16,0,0.28156960010528564
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,4,128,1,float16,fp8,0,0.26597440242767334
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,8,128,1,float16,float16,0,0.277347207069397
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,8,128,1,float16,fp8,0,0.27968320846557615
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,96,128,1,float16,float16,0,0.196014404296875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,8,128,1,fp8,fp8,0,0.27535200119018555
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,96,128,1,float16,fp8,0,0.18263360261917114
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,4,128,1,fp8,fp8,0,0.26473278999328614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,96,128,1,fp8,fp8,0,0.18715039491653443
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,1,128,1,float16,float16,0,0.14875999689102173
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,1,128,1,float16,fp8,0,0.1480288028717041
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,1,128,1,fp8,fp8,0,0.1449504017829895
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,2,128,1,float16,float16,0,0.148854398727417
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,2,128,1,fp8,fp8,0,0.1451248049736023
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,4,128,1,float16,float16,0,0.15194560289382936
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,4,128,1,float16,fp8,0,0.14508639574050902
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,4,128,1,fp8,fp8,0,0.14517600536346437
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,8,128,1,float16,float16,0,0.15133440494537354
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,8,128,1,float16,fp8,0,0.14684159755706788
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,8,128,1,fp8,fp8,0,0.14514559507369995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,96,128,1,float16,float16,0,0.1090127944946289
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,96,128,1,float16,fp8,0,0.10195200443267823
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,96,128,1,fp8,fp8,0,0.10367039442062378
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,96,128,1,fp8,fp8,0,0.662604808807373
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,1,128,1,float16,fp8,0,0.08287839889526367
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,1,128,1,fp8,fp8,0,0.08255040049552917
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,2,128,1,float16,float16,0,0.0866096019744873
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,2,128,1,fp8,fp8,0,0.0829360008239746
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,4,128,1,float16,fp8,0,0.08365439772605895
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,4,128,1,fp8,fp8,0,0.0831824004650116
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,8,128,1,float16,float16,0,0.08770719766616822
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,8,128,1,float16,fp8,0,0.08328639864921569
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,8,128,1,fp8,fp8,0,0.0835103988647461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,96,128,1,float16,fp8,0,0.3435456037521362
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,2,128,1,float16,fp8,0,0.14657440185546874
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,8,128,1,fp8,fp8,0,0.9931551933288574
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,1,128,1,float16,float16,0,0.08602079749107361
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,2,128,1,float16,fp8,0,0.08250719904899598
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,4,128,1,float16,float16,0,0.08593279719352723
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,1,128,1,float16,fp8,0,4.234726333618164
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,1,128,1,float16,float16,0,4.6918785095214846
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,1,128,1,fp8,fp8,0,4.233091354370117
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,2,128,1,float16,float16,0,4.717536163330078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,2,128,1,float16,fp8,0,4.233340835571289
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,2,128,1,fp8,fp8,0,4.236225509643555
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,4,128,1,float16,float16,0,4.805416107177734
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,4,128,1,float16,fp8,0,4.232251358032227
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,4,128,1,fp8,fp8,0,4.231729507446289
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,8,128,1,float16,float16,0,4.9099681854248045
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,8,128,1,float16,fp8,0,4.227352142333984
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,96,128,1,float16,fp8,0,2.9010576248168944
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,96,128,1,float16,float16,0,3.3636993408203124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,96,128,1,fp8,fp8,0,2.8935184478759766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,1,128,1,float16,float16,0,2.273983955383301
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,8,128,1,fp8,fp8,0,4.296120071411133
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,1,128,1,float16,fp8,0,2.2436704635620117
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,1,128,1,fp8,fp8,0,2.125916862487793
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,2,128,1,float16,float16,0,2.212539291381836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,2,128,1,float16,fp8,0,2.28023681640625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,2,128,1,fp8,fp8,0,2.1249343872070314
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,4,128,1,float16,float16,0,2.429104042053223
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,4,128,1,float16,fp8,0,2.122764778137207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,4,128,1,fp8,fp8,0,2.127342414855957
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,8,128,1,float16,float16,0,2.309259223937988
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,8,128,1,float16,fp8,0,2.2498992919921874
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,96,128,1,float16,float16,0,1.6390367507934571
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,1,128,1,float16,float16,0,1.077943992614746
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,96,128,1,float16,fp8,0,1.610696029663086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,8,128,1,fp8,fp8,0,2.130641555786133
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,1,128,1,float16,fp8,0,1.0725728034973145
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,1,128,1,fp8,fp8,0,1.292313575744629
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,2,128,1,float16,float16,0,1.0865632057189942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,2,128,1,float16,fp8,0,1.1442655563354491
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,2,128,1,fp8,fp8,0,1.1015263557434083
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,4,128,1,float16,float16,0,1.103865623474121
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,4,128,1,fp8,fp8,0,1.0726927757263183
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,8,128,1,float16,float16,0,1.140056037902832
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,8,128,1,float16,fp8,0,1.1034000396728516
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,96,128,1,float16,float16,0,0.8233280181884766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,8,128,1,fp8,fp8,0,1.0947263717651368
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,96,128,1,float16,fp8,0,0.7936031818389893
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,96,128,1,fp8,fp8,0,0.7379903793334961
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,1,128,1,float16,float16,0,0.6101103782653808
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,1,128,1,float16,fp8,0,0.5554495811462402
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,1,128,1,fp8,fp8,0,0.557254409790039
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,2,128,1,float16,float16,0,0.5519055843353271
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,2,128,1,float16,fp8,0,0.5707647800445557
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,2,128,1,fp8,fp8,0,0.5455008029937745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,4,128,1,float16,float16,0,0.6114496231079102
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,4,128,1,float16,fp8,0,0.5514639854431153
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,8,128,1,float16,float16,0,0.5648064136505127
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,8,128,1,float16,fp8,0,0.5523712158203125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,96,128,1,float16,float16,0,0.44155359268188477
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,96,128,1,fp8,fp8,0,1.4547632217407227
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,4,128,1,fp8,fp8,0,0.5442287921905518
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,96,128,1,float16,fp8,0,0.3807231903076172
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,1,128,1,float16,float16,0,0.285862398147583
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,1,128,1,float16,fp8,0,0.31414079666137695
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,1,128,1,fp8,fp8,0,0.2824320077896118
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,2,128,1,float16,float16,0,0.2826512098312378
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,2,128,1,fp8,fp8,0,0.2858367919921875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,4,128,1,float16,float16,0,0.30634241104125975
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,4,128,1,float16,fp8,0,0.2824336051940918
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,2,128,1,float16,fp8,0,0.2828320026397705
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,4,128,1,fp8,fp8,0,0.282475209236145
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,8,128,1,float16,float16,0,0.293393611907959
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,4,128,1,float16,fp8,0,1.0719264030456543
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,96,128,1,float16,float16,0,0.21759519577026368
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,8,128,1,fp8,fp8,0,0.2830879926681519
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,96,128,1,float16,fp8,0,0.19976799488067626
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,1,128,1,float16,float16,0,0.15343519449234008
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,96,128,1,fp8,fp8,0,0.20008320808410646
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,1,128,1,float16,fp8,0,0.15202560424804687
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,2,128,1,float16,float16,0,0.15334880352020264
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,1,128,1,fp8,fp8,0,0.15163040161132812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,2,128,1,float16,fp8,0,0.15186879634857178
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,2,128,1,fp8,fp8,0,0.15142079591751098
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,4,128,1,float16,float16,0,0.15403679609298707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,4,128,1,fp8,fp8,0,0.1517135977745056
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,8,128,1,float16,float16,0,0.15668799877166747
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,8,128,1,fp8,fp8,0,0.15254080295562744
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,96,128,1,float16,float16,0,0.11987680196762085
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,96,128,1,fp8,fp8,0,0.1108672022819519
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,1,128,1,float16,float16,0,0.08705760240554809
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,1,128,1,float16,fp8,0,0.08485440015792847
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,1,128,1,fp8,fp8,0,0.08428320288658142
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,2,128,1,float16,float16,0,0.0866927981376648
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,2,128,1,float16,fp8,0,0.08471199870109558
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,2,128,1,fp8,fp8,0,0.08461120128631591
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,4,128,1,float16,float16,0,0.08735520243644715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,4,128,1,float16,fp8,0,0.08444960117340088
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,4,128,1,fp8,fp8,0,0.08466399908065796
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,8,128,1,float16,float16,0,0.08881440162658691
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,8,128,1,float16,fp8,0,0.08472480177879334
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,8,128,1,fp8,fp8,0,0.08449599742889405
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,8,128,1,fp8,fp8,0,0.5459136009216309
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,96,128,1,float16,float16,0,0.06988000273704528
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,96,128,1,float16,fp8,0,0.06387360095977783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,1,128,1,float16,float16,0,0.052534401416778564
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,96,128,1,fp8,fp8,0,0.06275039911270142
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,1,128,1,float16,fp8,0,0.05142880082130432
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,1,128,1,fp8,fp8,0,0.05100640058517456
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,2,128,1,float16,float16,0,0.0523967981338501
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,2,128,1,float16,fp8,0,0.05140799880027771
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,96,128,1,fp8,fp8,0,0.3788784027099609
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,4,128,1,float16,float16,0,0.05308960080146789
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,4,128,1,float16,fp8,0,0.05111200213432312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,4,128,1,fp8,fp8,0,0.05132799744606018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,8,128,1,float16,fp8,0,0.050886398553848265
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,8,128,1,fp8,fp8,0,0.051123201847076416
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,8,128,1,float16,float16,0,0.0531328022480011
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,8,128,1,float16,fp8,0,0.29260959625244143
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,4,128,1,float16,fp8,0,0.15093280076980592
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,8,128,1,float16,fp8,0,0.15174560546875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,96,128,1,float16,fp8,0,0.11047680377960205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,2,128,1,fp8,fp8,0,0.05104960203170776
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,96,1,128,1,float16,float16,0,3.284401702880859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,96,1,128,1,float16,fp8,0,3.269417572021484
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,96,1,128,1,fp8,fp8,0,3.265891265869141
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,96,2,128,1,float16,float16,0,3.3686737060546874
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,96,2,128,1,float16,fp8,0,3.2608478546142576
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,96,2,128,1,fp8,fp8,0,3.2583694458007812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,96,4,128,1,float16,float16,0,3.3067134857177733
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,96,4,128,1,float16,fp8,0,3.2538719177246094
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,96,4,128,1,fp8,fp8,0,3.3016929626464844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,96,8,128,1,float16,float16,0,3.3683696746826173
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,96,8,128,1,float16,fp8,0,3.2520286560058596
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,96,96,128,1,float16,float16,0,2.6711231231689454
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,96,96,128,1,float16,fp8,0,2.3867088317871095
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,96,1,128,1,float16,float16,0,1.5913472175598145
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,96,8,128,1,fp8,fp8,0,3.3238784790039064
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,96,1,128,1,float16,fp8,0,1.6411792755126953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,96,1,128,1,fp8,fp8,0,1.7123567581176757
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,96,2,128,1,float16,float16,0,1.5985024452209473
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,96,96,128,1,fp8,fp8,0,2.4320304870605467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,96,2,128,1,fp8,fp8,0,1.6365392684936524
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,96,2,128,1,float16,fp8,0,1.7029680252075194
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,96,4,128,1,float16,float16,0,1.6703567504882812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,96,4,128,1,fp8,fp8,0,1.6366432189941407
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,96,8,128,1,float16,float16,0,1.715153694152832
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,96,8,128,1,float16,fp8,0,1.6738576889038086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,96,8,128,1,fp8,fp8,0,1.634872055053711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,96,96,128,1,float16,float16,0,1.32774076461792
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,96,96,128,1,float16,fp8,0,1.2130127906799317
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,96,1,128,1,float16,float16,0,0.804958438873291
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,96,96,128,1,fp8,fp8,0,1.2954383850097657
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,96,1,128,1,float16,fp8,0,0.8840208053588867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,96,1,128,1,fp8,fp8,0,0.8307536125183106
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,96,2,128,1,float16,float16,0,0.805577564239502
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,96,2,128,1,fp8,fp8,0,0.8270432472229003
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,96,2,128,1,float16,fp8,0,0.8489616394042969
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,96,4,128,1,float16,float16,0,0.8318575859069824
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,96,4,128,1,float16,fp8,0,0.8453616142272949
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,96,4,128,1,fp8,fp8,0,0.8521871566772461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,96,8,128,1,float16,float16,0,0.8365792274475098
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,96,96,128,1,float16,float16,0,0.6761023998260498
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,96,8,128,1,fp8,fp8,0,0.8258399963378906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,96,96,128,1,float16,fp8,0,0.6104127883911132
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,96,1,128,1,float16,float16,0,0.42334880828857424
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,96,96,128,1,fp8,fp8,0,0.6218480110168457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,96,1,128,1,float16,fp8,0,0.44355998039245603
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,96,2,128,1,float16,float16,0,0.41202878952026367
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,96,1,128,1,fp8,fp8,0,0.42557439804077146
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,96,2,128,1,float16,fp8,0,0.42026400566101074
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,96,4,128,1,float16,fp8,0,1.634993553161621
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,96,2,128,1,fp8,fp8,0,0.42647361755371094
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,96,4,128,1,float16,float16,0,0.41566081047058107
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,96,4,128,1,float16,fp8,0,0.43897600173950196
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,96,4,128,1,fp8,fp8,0,0.42114720344543455
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,96,8,128,1,float16,float16,0,0.42572479248046874
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,96,8,128,1,float16,fp8,0,0.42025117874145507
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,96,8,128,1,fp8,fp8,0,0.4198256015777588
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,96,96,128,1,float16,float16,0,0.342142391204834
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,96,96,128,1,float16,fp8,0,0.3119472026824951
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,96,1,128,1,float16,float16,0,0.21540160179138185
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,96,1,128,1,float16,fp8,0,0.218940806388855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,96,1,128,1,fp8,fp8,0,0.21827681064605714
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,96,96,128,1,fp8,fp8,0,0.31863839626312257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,96,2,128,1,float16,float16,0,0.21515359878540039
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,96,2,128,1,float16,fp8,0,0.21944479942321776
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,96,2,128,1,fp8,fp8,0,0.21793439388275146
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,96,4,128,1,float16,float16,0,0.21775200366973876
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,96,4,128,1,float16,fp8,0,0.21816480159759521
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,96,4,128,1,fp8,fp8,0,0.21798241138458252
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,96,8,128,1,float16,float16,0,0.2243583917617798
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,96,8,128,1,float16,fp8,0,0.2189568042755127
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,96,8,128,1,fp8,fp8,0,0.21992480754852295
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,96,96,128,1,float16,float16,0,0.17971839904785156
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,96,96,128,1,float16,fp8,0,0.16525280475616455
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,96,96,128,1,fp8,fp8,0,0.16481599807739258
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,96,1,128,1,float16,fp8,0,0.11929119825363159
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,96,2,128,1,float16,float16,0,0.11793760061264039
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,96,2,128,1,float16,fp8,0,0.11749759912490845
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,96,2,128,1,fp8,fp8,0,0.11757279634475708
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,96,4,128,1,float16,float16,0,0.11856000423431397
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,96,4,128,1,float16,fp8,0,0.11846879720687867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,96,4,128,1,fp8,fp8,0,0.11785600185394288
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,96,8,128,1,float16,float16,0,0.1205888032913208
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,96,8,128,1,fp8,fp8,0,0.11756319999694824
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,96,1,128,1,fp8,fp8,0,0.11718239784240722
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,96,96,128,1,float16,float16,0,0.1006816029548645
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,96,96,128,1,float16,fp8,0,0.09198240041732789
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,96,96,128,1,fp8,fp8,0,0.090939199924469
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,96,1,128,1,float16,float16,0,0.06637920141220092
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,96,2,128,1,float16,float16,0,0.06686879992485047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,96,2,128,1,float16,fp8,0,0.06580479741096497
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,96,2,128,1,fp8,fp8,0,0.0659712016582489
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,96,4,128,1,float16,fp8,0,0.0660431981086731
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,96,1,128,1,float16,fp8,0,0.06606559753417969
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,96,4,128,1,fp8,fp8,0,0.06595839858055115
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,96,1,128,1,fp8,fp8,0,0.06597599983215333
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,96,4,128,1,float16,float16,0,0.06820480227470398
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,96,8,128,1,float16,fp8,0,0.0662671983242035
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,96,8,128,1,fp8,fp8,0,0.06592320203781128
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,96,96,128,1,float16,float16,0,0.056918400526046756
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,96,96,128,1,float16,fp8,0,0.05132319927215576
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,96,96,128,1,fp8,fp8,0,0.051393598318099976
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,96,1,128,1,float16,float16,0,0.03940800130367279
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,96,1,128,1,float16,fp8,0,0.039139199256896975
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,96,8,128,1,float16,float16,0,0.06935359835624695
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,96,2,128,1,float16,float16,0,0.039444801211357114
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,96,2,128,1,float16,fp8,0,0.03915359973907471
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,96,4,128,1,float16,float16,0,0.040372800827026364
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,96,4,128,1,float16,fp8,0,0.03914079964160919
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,96,4,128,1,fp8,fp8,0,0.039110401272773744
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,96,8,128,1,float16,float16,0,0.04094560146331787
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,96,8,128,1,float16,fp8,0,0.03914240002632141
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,96,8,128,1,fp8,fp8,0,0.03912639915943146
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,96,2,128,1,fp8,fp8,0,0.03912639915943146
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,96,96,128,1,float16,float16,0,0.033046400547027587
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,96,96,128,1,float16,fp8,0,0.03302719891071319
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,96,96,128,1,fp8,fp8,0,0.03294720053672791
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,96,1,128,1,float16,fp8,0,0.026907199621200563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,96,1,128,1,fp8,fp8,0,0.026843199133872987
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,96,2,128,1,float16,float16,0,0.027793601155281067
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,96,2,128,1,float16,fp8,0,0.026844799518585205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,96,2,128,1,fp8,fp8,0,0.026870399713516235
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,96,4,128,1,float16,float16,0,0.027739199995994567
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,96,4,128,1,float16,fp8,0,0.026876801252365114
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,96,1,128,1,float16,float16,0,0.027219200134277345
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,96,4,128,1,fp8,fp8,0,0.026851201057434083
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,96,8,128,1,float16,float16,0,0.02869119942188263
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,96,8,128,1,float16,fp8,0,0.026844799518585205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,96,8,128,1,fp8,fp8,0,0.026940798759460448
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,96,1,128,1,float16,float16,0,0.11687519550323486
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,96,8,128,1,float16,fp8,0,0.11792639493942261
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,96,1,128,1,float16,float16,0,1.3113087654113769
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,96,1,128,1,fp8,fp8,0,1.3775535583496095
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,96,2,128,1,float16,float16,0,1.3136912345886231
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,96,1,128,1,fp8,fp8,0,0.03912799954414368
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,96,2,128,1,float16,fp8,0,1.3743231773376465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,96,2,128,1,fp8,fp8,0,1.3890671730041504
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,96,4,128,1,float16,float16,0,1.3330752372741699
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,96,4,128,1,float16,fp8,0,1.3726223945617675
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,96,4,128,1,fp8,fp8,0,1.3712047576904296
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,96,8,128,1,float16,float16,0,1.3724752426147462
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,96,8,128,1,float16,fp8,0,1.3795663833618164
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,96,8,128,1,fp8,fp8,0,1.4143967628479004
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,96,1,128,1,float16,fp8,0,1.375928020477295
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,96,96,128,1,float16,float16,0,1.1593104362487794
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,96,96,128,1,float16,fp8,0,1.0632512092590332
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,96,1,128,1,float16,float16,0,0.6627295970916748
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,96,1,128,1,float16,fp8,0,0.6982128143310546
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,96,96,128,1,fp8,fp8,0,1.1107935905456543
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,96,8,128,1,float16,fp8,0,0.8255743980407715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,96,1,128,1,fp8,fp8,0,0.6954847812652588
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,96,2,128,1,float16,float16,0,0.6746399879455567
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,96,2,128,1,float16,fp8,0,0.6938159942626954
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,96,2,128,1,fp8,fp8,0,0.6942527770996094
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,96,4,128,1,float16,float16,0,0.6741280078887939
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,96,4,128,1,float16,fp8,0,0.6991983890533447
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,96,4,128,1,fp8,fp8,0,0.6945824146270752
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,96,8,128,1,float16,float16,0,0.6937215805053711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,96,8,128,1,float16,fp8,0,0.7002511978149414
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,96,96,128,1,float16,float16,0,0.5863872051239014
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,96,1,128,1,float16,float16,0,0.3393359899520874
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,96,1,128,1,float16,fp8,0,0.3533888101577759
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,96,1,128,1,fp8,fp8,0,0.35687520503997805
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,96,96,128,1,fp8,fp8,0,0.5411856174468994
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,96,2,128,1,float16,float16,0,0.33925440311431887
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,96,2,128,1,float16,fp8,0,0.35314719676971434
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,96,2,128,1,fp8,fp8,0,0.3525680065155029
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,96,4,128,1,float16,float16,0,0.3441888093948364
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,96,4,128,1,float16,fp8,0,0.35338559150695803
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,96,4,128,1,fp8,fp8,0,0.3528719902038574
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,96,8,128,1,float16,float16,0,0.35359840393066405
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,96,8,128,1,float16,fp8,0,0.3530400037765503
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,96,96,128,1,float16,float16,0,0.2997136116027832
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,96,1,128,1,float16,float16,0,0.17643519639968872
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,96,96,128,1,float16,fp8,0,0.27525599002838136
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,96,8,128,1,fp8,fp8,0,0.3529887914657593
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,96,96,128,1,fp8,fp8,0,0.2753439903259277
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,96,1,128,1,float16,fp8,0,0.1819584012031555
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,96,2,128,1,float16,float16,0,0.17651840448379516
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,96,2,128,1,float16,fp8,0,0.18120800256729125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,96,2,128,1,fp8,fp8,0,0.1817631959915161
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,96,4,128,1,float16,float16,0,0.1787184000015259
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,96,4,128,1,float16,fp8,0,0.18186240196228026
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,96,4,128,1,fp8,fp8,0,0.18140000104904175
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,96,8,128,1,float16,float16,0,0.18303680419921875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,96,8,128,1,float16,fp8,0,0.18186719417572023
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,96,8,128,1,fp8,fp8,0,0.18218719959259033
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,96,96,128,1,float16,float16,0,0.1596495985984802
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,96,1,128,1,float16,float16,0,0.09741439819335937
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,96,96,128,1,fp8,fp8,0,0.14574559926986694
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,96,1,128,1,fp8,fp8,0,0.09927359819412232
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,96,2,128,1,float16,float16,0,0.09755679965019226
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,96,2,128,1,float16,fp8,0,0.09874879717826843
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,96,2,128,1,fp8,fp8,0,0.09911999702453614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,96,4,128,1,float16,fp8,0,0.09925600290298461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,96,4,128,1,fp8,fp8,0,0.0991599977016449
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,96,8,128,1,float16,float16,0,0.10125279426574707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,96,96,128,1,float16,fp8,0,0.5412591934204102
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,96,8,128,1,fp8,fp8,0,0.6930287837982178
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,96,8,128,1,float16,fp8,0,0.09958879947662354
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,96,8,128,1,fp8,fp8,0,0.09954400062561035
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,96,96,128,1,float16,float16,0,0.09009919762611389
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,96,96,128,1,float16,fp8,0,0.08218240141868591
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,96,96,128,1,fp8,fp8,0,0.08214560151100159
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,96,1,128,1,float16,float16,0,0.056428802013397214
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,96,1,128,1,float16,fp8,0,0.057036799192428586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,96,1,128,1,fp8,fp8,0,0.0565392017364502
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,96,2,128,1,float16,float16,0,0.05699679851531982
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,96,2,128,1,float16,fp8,0,0.0560479998588562
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,96,2,128,1,fp8,fp8,0,0.056720000505447385
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,96,4,128,1,float16,fp8,0,0.0560479998588562
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,96,4,128,1,fp8,fp8,0,0.057601600885391235
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,96,8,128,1,float16,float16,0,0.05927519798278809
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,96,8,128,1,float16,fp8,0,0.05760319828987122
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,96,8,128,1,fp8,fp8,0,0.0572704017162323
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,96,96,128,1,float16,float16,0,0.049369600415229795
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,96,96,128,1,float16,fp8,0,0.04335359930992126
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,96,96,128,1,fp8,fp8,0,0.04525279998779297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,96,1,128,1,float16,float16,0,0.03253119885921478
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,96,1,128,1,float16,fp8,0,0.03299039900302887
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,96,1,128,1,fp8,fp8,0,0.03184320032596588
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,96,2,128,1,float16,fp8,0,0.032390400767326355
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,96,1,128,1,fp8,fp8,0,0.18155360221862793
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,96,2,128,1,fp8,fp8,0,0.03299039900302887
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,96,4,128,1,float16,float16,0,0.032995200157165526
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,96,2,128,1,float16,float16,0,0.03300800025463104
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,96,4,128,1,float16,fp8,0,0.03298879861831665
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,96,4,128,1,fp8,fp8,0,0.032528001070022586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,96,8,128,1,float16,fp8,0,0.033011201024055484
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,96,8,128,1,fp8,fp8,0,0.03298400044441223
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,96,96,128,1,float16,fp8,0,0.14601119756698608
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,96,1,128,1,float16,fp8,0,0.09939200282096863
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,96,96,128,1,float16,fp8,0,0.028990399837493897
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,96,96,128,1,fp8,fp8,0,0.02889760136604309
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,96,1,128,1,float16,float16,0,0.022995199263095855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,96,1,128,1,float16,fp8,0,0.022785599529743194
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,96,1,128,1,fp8,fp8,0,0.02277279943227768
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,96,8,128,1,float16,float16,0,0.03297599852085113
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,96,2,128,1,float16,fp8,0,0.023214399814605713
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,96,2,128,1,fp8,fp8,0,0.022782400250434875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,96,4,128,1,float16,float16,0,0.09896640181541443
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,96,4,128,1,float16,float16,0,0.024600000679492952
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,96,4,128,1,float16,fp8,0,0.023257599771022798
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,96,4,128,1,fp8,fp8,0,0.022767999768257143
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,96,8,128,1,float16,float16,0,0.02475520074367523
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,96,8,128,1,float16,fp8,0,0.02340800017118454
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,96,8,128,1,fp8,fp8,0,0.02399519979953766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,96,96,128,1,float16,float16,0,0.01866399943828583
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,96,96,128,1,fp8,fp8,0,0.018620799481868743
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,96,1,128,1,float16,fp8,0,0.016539199650287627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,96,1,128,1,fp8,fp8,0,0.016415999829769136
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,96,2,128,1,float16,float16,0,0.01656319946050644
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,96,2,128,1,float16,fp8,0,0.016161599755287172
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,96,2,128,1,fp8,fp8,0,0.016527999937534333
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,96,4,128,1,float16,float16,0,0.016575999557971954
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,96,96,128,1,float16,fp8,0,0.018681600689888
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,96,4,128,1,fp8,fp8,0,0.016545599699020384
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,96,4,128,1,float16,fp8,0,0.01655679941177368
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,96,8,128,1,float16,float16,0,0.01653600037097931
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,96,8,128,1,fp8,fp8,0,0.016616000235080718
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,96,8,128,1,float16,fp8,0,0.016569599509239197
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,96,1,128,1,float16,float16,0,0.8048463821411133
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,96,1,128,1,float16,fp8,0,0.835632038116455
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,96,1,128,1,fp8,fp8,0,0.8342656135559082
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,96,4,128,1,float16,float16,0,0.057734400033950806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,96,2,128,1,float16,float16,0,0.8054623603820801
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,96,2,128,1,fp8,fp8,0,0.8347423553466797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,96,4,128,1,float16,float16,0,0.8144720077514649
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,96,4,128,1,float16,fp8,0,0.8319328308105469
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,96,96,128,1,float16,float16,0,0.028887999057769776
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,96,4,128,1,fp8,fp8,0,0.8321184158325196
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,96,2,128,1,float16,float16,0,0.024414399266242982
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,96,8,128,1,float16,float16,0,0.834870433807373
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,96,8,128,1,float16,fp8,0,0.8313967704772949
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,96,8,128,1,fp8,fp8,0,0.8314687728881835
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,96,1,128,1,float16,float16,0,0.016652800142765045
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,96,96,128,1,float16,float16,0,0.6503759860992432
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,96,1,128,1,float16,float16,0,0.41027679443359377
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,96,96,128,1,float16,fp8,0,0.6063168048858643
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,96,1,128,1,float16,fp8,0,0.42284159660339354
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,96,96,128,1,fp8,fp8,0,0.6071280002593994
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,96,1,128,1,fp8,fp8,0,0.4225008010864258
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,96,2,128,1,float16,float16,0,0.40804638862609866
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,96,2,128,1,float16,fp8,0,0.42263040542602537
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,96,2,128,1,fp8,fp8,0,0.42407522201538084
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,96,4,128,1,float16,float16,0,0.4139088153839111
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,96,4,128,1,float16,fp8,0,0.4221807956695557
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,96,4,128,1,fp8,fp8,0,0.42287678718566896
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,96,8,128,1,float16,fp8,0,0.42191362380981445
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,96,96,128,1,float16,float16,0,0.3330543994903564
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,96,96,128,1,float16,fp8,0,0.3095792055130005
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,96,1,128,1,float16,float16,0,0.2118544101715088
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,96,96,128,1,fp8,fp8,0,0.3095871925354004
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,96,1,128,1,float16,fp8,0,0.21712958812713623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,96,2,128,1,float16,float16,0,0.21191680431365967
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,96,2,128,1,float16,fp8,0,0.21609599590301515
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,96,2,128,1,fp8,fp8,0,0.21638720035552977
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,96,4,128,1,float16,float16,0,0.21425600051879884
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,96,4,128,1,float16,fp8,0,0.21612000465393066
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,96,4,128,1,fp8,fp8,0,0.21632320880889894
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,96,8,128,1,float16,float16,0,0.2187743902206421
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,96,8,128,1,float16,fp8,0,0.21648800373077393
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,96,8,128,1,fp8,fp8,0,0.21621921062469482
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,96,96,128,1,float16,float16,0,0.17109279632568358
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,96,96,128,1,float16,fp8,0,0.1599679946899414
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,96,1,128,1,float16,float16,0,0.11223839521408081
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,96,1,128,1,fp8,fp8,0,0.11338399648666382
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,96,2,128,1,float16,float16,0,0.11223360300064086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,96,1,128,1,float16,fp8,0,0.11303999423980712
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,96,2,128,1,float16,fp8,0,0.11348160505294799
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,96,2,128,1,fp8,fp8,0,0.11311520338058471
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,96,4,128,1,float16,float16,0,0.11332639455795288
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,96,4,128,1,float16,fp8,0,0.11313920021057129
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,96,4,128,1,fp8,fp8,0,0.11335519552230836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,96,8,128,1,float16,float16,0,0.11551840305328369
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,96,8,128,1,float16,fp8,0,0.11345119476318359
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,96,8,128,1,fp8,fp8,0,0.1129263997077942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,96,96,128,1,float16,float16,0,0.09374399781227112
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,96,96,128,1,float16,fp8,0,0.0869983971118927
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,96,96,128,1,fp8,fp8,0,0.08647199869155883
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,96,1,128,1,float16,fp8,0,0.061694401502609256
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,96,8,128,1,float16,float16,0,0.42212958335876466
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,96,1,128,1,fp8,fp8,0,0.061849600076675414
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,96,2,128,1,float16,float16,0,0.06233279705047608
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,96,2,128,1,float16,fp8,0,0.06167839765548706
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,96,8,128,1,fp8,fp8,0,0.42244157791137693
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,96,4,128,1,float16,fp8,0,0.061617600917816165
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,96,1,128,1,fp8,fp8,0,0.21774399280548096
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,96,4,128,1,fp8,fp8,0,0.06170079708099365
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,96,8,128,1,float16,float16,0,0.06394559741020203
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,96,4,128,1,float16,float16,0,0.06337119936943054
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,96,8,128,1,float16,fp8,0,0.06188480257987976
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,96,96,128,1,float16,float16,0,0.05292320251464844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,96,96,128,1,fp8,fp8,0,0.04920800030231476
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,96,1,128,1,float16,float16,0,0.036899200081825255
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,96,8,128,1,fp8,fp8,0,0.0619488000869751
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,96,1,128,1,float16,fp8,0,0.03705120086669922
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,96,2,128,1,float16,fp8,0,0.8338848114013672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,96,2,128,1,float16,float16,0,0.036180800199508666
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,96,2,128,1,float16,fp8,0,0.03701280057430267
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,96,2,128,1,fp8,fp8,0,0.037028801441192624
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,96,4,128,1,float16,float16,0,0.03702079951763153
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,96,4,128,1,float16,fp8,0,0.03690559864044189
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,96,96,128,1,fp8,fp8,0,0.15994880199432374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,96,8,128,1,float16,float16,0,0.037041598558425905
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,96,8,128,1,float16,fp8,0,0.03713119924068451
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,96,8,128,1,fp8,fp8,0,0.03703039884567261
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,96,96,128,1,float16,float16,0,0.028772801160812378
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,96,96,128,1,float16,fp8,0,0.02747200131416321
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,96,96,128,1,fp8,fp8,0,0.02871519923210144
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,96,1,128,1,float16,float16,0,0.022601599991321563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,96,1,128,1,float16,fp8,0,0.0227743998169899
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,96,1,128,1,fp8,fp8,0,0.022672000527381896
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,96,2,128,1,float16,float16,0,0.022729599475860597
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,96,2,128,1,float16,fp8,0,0.022672000527381896
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,96,2,128,1,fp8,fp8,0,0.022755199670791627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,96,4,128,1,float16,float16,0,0.022668799757957457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,96,4,128,1,float16,fp8,0,0.022758400440216063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,96,4,128,1,fp8,fp8,0,0.022694399952888487
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,96,8,128,1,float16,float16,0,0.022724799811840057
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,96,8,128,1,fp8,fp8,0,0.02269120067358017
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,96,96,128,1,float16,float16,0,0.018643200397491455
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,96,96,128,1,float16,fp8,0,0.019016000628471374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,96,8,128,1,float16,fp8,0,0.022732800245285033
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,96,96,128,1,fp8,fp8,0,0.018716800212860107
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,96,1,128,1,float16,float16,0,0.016577599942684172
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,96,1,128,1,float16,fp8,0,0.016651199758052827
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,96,1,128,1,float16,float16,0,0.062299197912216185
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,96,1,128,1,fp8,fp8,0,0.01656319946050644
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,96,2,128,1,float16,float16,0,0.016627199947834015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,96,2,128,1,fp8,fp8,0,0.016672000288963318
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,96,4,128,1,float16,float16,0,0.0166143998503685
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,96,4,128,1,float16,fp8,0,0.01653759926557541
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,96,4,128,1,fp8,fp8,0,0.016648000478744505
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,96,8,128,1,float16,float16,0,0.016516800224781036
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,96,8,128,1,float16,fp8,0,0.016612799465656282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,96,8,128,1,fp8,fp8,0,0.01656000018119812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,96,96,128,1,float16,float16,0,0.014609600603580474
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,96,2,128,1,fp8,fp8,0,0.061641597747802736
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,96,96,128,1,float16,fp8,0,0.012508800625801087
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,96,1,128,1,float16,float16,0,0.01130400002002716
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,96,1,128,1,float16,fp8,0,0.011553599685430526
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,96,1,128,1,fp8,fp8,0,0.011611200124025344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,96,2,128,1,float16,float16,0,0.011534400284290314
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,96,2,128,1,float16,fp8,0,0.01220960021018982
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,96,2,128,1,fp8,fp8,0,0.01197120025753975
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,96,4,128,1,float16,float16,0,0.011648000031709672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,96,4,128,1,float16,fp8,0,0.012243200093507767
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,96,4,128,1,fp8,fp8,0,0.012542399764060973
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,96,8,128,1,float16,float16,0,0.01241919994354248
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,96,96,128,1,float16,fp8,0,0.04833599925041199
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,96,8,128,1,fp8,fp8,0,0.0124719999730587
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,96,1,128,1,fp8,fp8,0,0.037003201246261594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,96,1,128,1,float16,float16,0,0.6202320098876953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,96,4,128,1,fp8,fp8,0,0.037006399035453795
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,96,1,128,1,float16,fp8,0,0.6339151859283447
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,96,1,128,1,fp8,fp8,0,0.6337007999420166
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,96,2,128,1,float16,float16,0,0.620201587677002
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,96,2,128,1,float16,fp8,0,0.6335472106933594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,96,2,128,1,fp8,fp8,0,0.6325823783874511
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,96,4,128,1,float16,float16,0,0.6247536182403565
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,96,4,128,1,float16,fp8,0,0.6321248054504395
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,96,2,128,1,float16,fp8,0,0.01660960018634796
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,96,4,128,1,fp8,fp8,0,0.633241605758667
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,96,96,128,1,fp8,fp8,0,0.013046400249004364
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,96,8,128,1,float16,float16,0,0.6340432167053223
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,96,8,128,1,float16,fp8,0,0.6315824031829834
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,96,96,128,1,float16,float16,0,0.43688321113586426
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,96,96,128,1,float16,fp8,0,0.41519842147827146
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,96,8,128,1,fp8,fp8,0,0.6316431999206543
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,96,1,128,1,float16,float16,0,0.31650240421295167
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,96,1,128,1,float16,fp8,0,0.3222559928894043
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,96,8,128,1,float16,fp8,0,0.01255359947681427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,96,96,128,1,fp8,fp8,0,0.4134704113006592
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,96,2,128,1,float16,fp8,0,0.3220128059387207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,96,2,128,1,fp8,fp8,0,0.3220367908477783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,96,4,128,1,float16,fp8,0,0.3217583894729614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,96,4,128,1,fp8,fp8,0,0.3219327926635742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,96,8,128,1,float16,float16,0,0.32338879108428953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,96,8,128,1,float16,fp8,0,0.3222527980804443
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,96,96,128,1,float16,float16,0,0.22391679286956787
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,96,96,128,1,float16,fp8,0,0.2128351926803589
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,96,96,128,1,fp8,fp8,0,0.21313760280609131
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,96,1,128,1,float16,float16,0,0.16503039598464966
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,96,1,128,1,float16,fp8,0,0.16649919748306274
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,96,1,128,1,fp8,fp8,0,0.1669152021408081
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,96,2,128,1,float16,float16,0,0.1653615951538086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,96,2,128,1,fp8,fp8,0,0.166428804397583
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,96,4,128,1,float16,float16,0,0.16617439985275267
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,96,4,128,1,float16,fp8,0,0.16660480499267577
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,96,4,128,1,fp8,fp8,0,0.1664896011352539
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,96,8,128,1,float16,float16,0,0.16886080503463746
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,96,8,128,1,float16,fp8,0,0.16667040586471557
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,96,96,128,1,float16,float16,0,0.11813600063323974
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,96,8,128,1,fp8,fp8,0,0.1672160029411316
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,96,96,128,1,float16,fp8,0,0.11149439811706544
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,96,96,128,1,fp8,fp8,0,0.11180800199508667
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,96,1,128,1,float16,float16,0,0.08900160193443299
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,96,1,128,1,float16,fp8,0,0.08727840185165406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,96,1,128,1,fp8,fp8,0,0.08728320002555848
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,96,2,128,1,float16,fp8,0,0.08711360096931457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,96,2,128,1,fp8,fp8,0,0.08703839778900146
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,96,2,128,1,float16,float16,0,0.08902400135993957
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,96,4,128,1,float16,float16,0,0.08944000005722046
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,96,1,128,1,fp8,fp8,0,0.3222352027893066
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,96,4,128,1,fp8,fp8,0,0.08692799806594849
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,96,8,128,1,float16,float16,0,0.09134399890899658
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,96,4,128,1,float16,float16,0,0.3187903881072998
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,96,4,128,1,float16,fp8,0,0.08750240206718445
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,96,8,128,1,fp8,fp8,0,0.08717280030250549
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,96,96,128,1,float16,fp8,0,0.06054239869117737
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,96,96,128,1,fp8,fp8,0,0.05987039804458618
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,96,1,128,1,float16,float16,0,0.049379199743270874
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,96,1,128,1,float16,fp8,0,0.04904159903526306
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,96,2,128,1,float16,float16,0,0.049446401000022885
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,96,2,128,1,float16,fp8,0,0.049272000789642334
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,96,8,128,1,fp8,fp8,0,0.32188479900360106
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,96,4,128,1,float16,float16,0,0.049296000599861146
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,96,4,128,1,float16,fp8,0,0.04864639937877655
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,96,4,128,1,fp8,fp8,0,0.04875519871711731
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,96,8,128,1,float16,float16,0,0.049446401000022885
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,96,8,128,1,float16,fp8,0,0.04890399873256683
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,96,8,128,1,fp8,fp8,0,0.04864639937877655
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,96,96,128,1,float16,float16,0,0.03497599959373474
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,96,2,128,1,float16,fp8,0,0.16629120111465454
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,96,96,128,1,float16,fp8,0,0.03508639931678772
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,96,96,128,1,fp8,fp8,0,0.035025599598884585
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,96,1,128,1,float16,float16,0,0.02940959930419922
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,96,1,128,1,float16,fp8,0,0.028908801078796387
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,96,1,128,1,fp8,fp8,0,0.028916800022125246
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,96,2,128,1,float16,float16,0,0.02913280129432678
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,96,2,128,1,float16,fp8,0,0.029049599170684816
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,96,2,128,1,fp8,fp8,0,0.028918400406837463
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,96,4,128,1,float16,float16,0,0.029257598519325256
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,96,4,128,1,float16,fp8,0,0.029411199688911437
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,96,4,128,1,fp8,fp8,0,0.028958401083946227
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,96,8,128,1,float16,float16,0,0.02980639934539795
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,96,8,128,1,float16,fp8,0,0.02908959984779358
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,96,8,128,1,fp8,fp8,0,0.029043200612068176
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,96,96,128,1,float16,fp8,0,0.02191839963197708
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,96,96,128,1,fp8,fp8,0,0.021027199923992157
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,96,1,128,1,float16,float16,0,0.01863359957933426
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,96,1,128,1,float16,fp8,0,0.018615999817848207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,96,1,128,1,fp8,fp8,0,0.01866399943828583
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,96,2,128,1,float16,fp8,0,0.01865759938955307
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,96,2,128,1,fp8,fp8,0,0.018649600446224213
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,96,4,128,1,float16,float16,0,0.018622399866580965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,96,4,128,1,float16,fp8,0,0.01860959976911545
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,96,4,128,1,fp8,fp8,0,0.018675200641155243
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,96,8,128,1,float16,float16,0,0.018636800348758698
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,96,8,128,1,float16,fp8,0,0.018688000738620758
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,96,2,128,1,float16,float16,0,0.31653120517730715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,96,8,128,1,fp8,fp8,0,0.018617600202560425
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,96,8,128,1,float16,fp8,0,0.08730400204658509
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,96,96,128,1,float16,float16,0,0.01661120057106018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,96,96,128,1,float16,fp8,0,0.016624000668525696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,96,96,128,1,fp8,fp8,0,0.01658399999141693
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,96,1,128,1,float16,float16,0,0.014531199634075165
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,96,1,128,1,float16,fp8,0,0.014451199769973755
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,96,96,128,1,float16,float16,0,0.0652671992778778
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,96,1,128,1,fp8,fp8,0,0.014580799639225006
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,96,2,128,1,float16,float16,0,0.014555199444293976
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,96,1,128,1,fp8,fp8,0,0.049116799235343934
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,96,2,128,1,fp8,fp8,0,0.014579200744628906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,96,4,128,1,float16,fp8,0,0.01451839953660965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,96,2,128,1,fp8,fp8,0,0.049379199743270874
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,96,4,128,1,fp8,fp8,0,0.014580799639225006
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,96,8,128,1,float16,float16,0,0.0144896000623703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,96,8,128,1,float16,fp8,0,0.014534400403499603
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,96,96,128,1,float16,float16,0,0.012515200674533844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,96,96,128,1,fp8,fp8,0,0.012316799908876418
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,96,1,128,1,float16,float16,0,0.010414399951696397
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,96,1,128,1,float16,fp8,0,0.010393600165843963
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,96,96,128,1,float16,fp8,0,0.010505600273609162
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,96,2,128,1,float16,float16,0,0.010449600219726563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,96,2,128,1,float16,fp8,0,0.010513599961996078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,96,2,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,96,4,128,1,float16,float16,0,0.010449600219726563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,96,4,128,1,float16,fp8,0,0.01050880029797554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,96,96,128,1,float16,float16,0,0.020854400098323823
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,96,8,128,1,float16,float16,0,0.010393600165843963
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,96,2,128,1,float16,float16,0,0.018641600012779237
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,96,8,128,1,float16,fp8,0,0.010487999767065048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,96,8,128,1,fp8,fp8,0,0.010494399815797806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,96,1,128,1,float16,float16,0,0.5333055973052978
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,96,1,128,1,float16,fp8,0,0.539134407043457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,96,1,128,1,fp8,fp8,0,0.539740800857544
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,96,2,128,1,float16,float16,0,0.5325727939605713
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,96,2,128,1,float16,fp8,0,0.5392303943634034
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,96,2,128,1,float16,fp8,0,0.014587199687957764
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,96,8,128,1,fp8,fp8,0,0.014567999541759491
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,96,4,128,1,float16,float16,0,0.535368013381958
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,96,4,128,1,float16,fp8,0,0.5382944107055664
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,96,1,128,1,fp8,fp8,0,0.010467199981212616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,96,4,128,1,fp8,fp8,0,0.5382880210876465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,96,8,128,1,float16,float16,0,0.5391183853149414
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,96,8,128,1,float16,fp8,0,0.5381904125213623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,96,96,128,1,float16,float16,0,0.3320303916931152
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,96,4,128,1,fp8,fp8,0,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,96,96,128,1,float16,fp8,0,0.32166399955749514
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,96,1,128,1,float16,float16,0,0.2736768007278442
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,96,1,128,1,float16,fp8,0,0.27489919662475587
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,96,96,128,1,fp8,fp8,0,0.32151041030883787
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,96,1,128,1,fp8,fp8,0,0.27540318965911864
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,96,2,128,1,float16,fp8,0,0.2755104064941406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,96,2,128,1,fp8,fp8,0,0.2742703914642334
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,96,4,128,1,float16,float16,0,0.27468318939208985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,96,4,128,1,float16,fp8,0,0.27551679611206054
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,96,4,128,1,fp8,fp8,0,0.27444159984588623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,96,8,128,1,float16,float16,0,0.2764240026473999
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,96,8,128,1,float16,fp8,0,0.27537920475006106
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,96,96,128,1,float16,float16,0,0.17402880191802977
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,96,8,128,1,fp8,fp8,0,0.2747056007385254
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,96,96,128,1,float16,fp8,0,0.16660319566726683
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,96,1,128,1,float16,float16,0,0.1452415943145752
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,96,1,128,1,float16,fp8,0,0.14241600036621094
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,96,1,128,1,fp8,fp8,0,0.14224640130996705
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,96,2,128,1,float16,float16,0,0.1447551965713501
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,96,2,128,1,float16,fp8,0,0.14255199432373047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,96,2,128,1,fp8,fp8,0,0.5393455982208252
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,96,4,128,1,float16,float16,0,0.014569599926471711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,96,2,128,1,fp8,fp8,0,0.1421056032180786
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,96,4,128,1,float16,float16,0,0.14529279470443726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,96,4,128,1,fp8,fp8,0,0.14225280284881592
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,96,8,128,1,float16,float16,0,0.14506560564041138
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,96,8,128,1,float16,fp8,0,0.1423616051673889
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,96,8,128,1,fp8,fp8,0,0.1425279974937439
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,96,96,128,1,float16,fp8,0,0.08825600147247314
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,96,96,128,1,fp8,fp8,0,0.0879855990409851
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,96,1,128,1,float16,float16,0,0.0771120011806488
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,96,1,128,1,float16,fp8,0,0.07609279751777649
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,96,1,128,1,fp8,fp8,0,0.07543839812278748
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,96,2,128,1,float16,float16,0,0.07722880244255066
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,96,2,128,1,fp8,fp8,0,0.07530239820480347
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,96,4,128,1,float16,float16,0,0.07735679745674133
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,96,4,128,1,float16,fp8,0,0.07540959715843201
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,96,2,128,1,float16,fp8,0,0.07572000026702881
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,96,4,128,1,fp8,fp8,0,0.07598879933357239
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,96,8,128,1,float16,fp8,0,0.07608000040054322
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,96,8,128,1,fp8,fp8,0,0.07580479979515076
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,96,8,128,1,fp8,fp8,0,0.5382319927215576
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,96,96,128,1,float16,float16,0,0.050276798009872434
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,96,96,128,1,fp8,fp8,0,0.048728001117706296
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,96,96,128,1,float16,fp8,0,0.048644798994064334
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,96,1,128,1,float16,float16,0,0.04381760060787201
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,96,1,128,1,float16,fp8,0,0.04324800074100495
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,96,2,128,1,float16,float16,0,0.04404639899730682
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,96,1,128,1,fp8,fp8,0,0.04323039948940277
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,96,2,128,1,float16,fp8,0,0.043110400438308716
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,96,2,128,1,fp8,fp8,0,0.0430976003408432
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,96,4,128,1,float16,float16,0,0.04421280026435852
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,96,4,128,1,float16,fp8,0,0.04294399917125702
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,96,4,128,1,fp8,fp8,0,0.04324800074100495
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,96,2,128,1,float16,float16,0,0.273528003692627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,96,8,128,1,float16,float16,0,0.043968001008033754
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,96,8,128,1,float16,fp8,0,0.04312160015106201
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,96,8,128,1,fp8,fp8,0,0.04294559955596924
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,96,96,128,1,float16,fp8,0,0.028880000114440918
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,96,96,128,1,fp8,fp8,0,0.028915199637413024
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,96,1,128,1,float16,float16,0,0.026836800575256347
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,96,1,128,1,float16,fp8,0,0.02686559855937958
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,96,1,128,1,fp8,fp8,0,0.026715201139450074
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,96,2,128,1,float16,float16,0,0.026881599426269533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,96,96,128,1,float16,float16,0,0.028961598873138428
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,96,2,128,1,float16,fp8,0,0.02661440074443817
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,96,2,128,1,fp8,fp8,0,0.026881599426269533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,96,4,128,1,float16,float16,0,0.02678399980068207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,96,4,128,1,fp8,fp8,0,0.026876801252365114
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,96,8,128,1,float16,float16,0,0.026782399415969847
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,96,8,128,1,float16,fp8,0,0.026849600672721862
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,96,8,128,1,fp8,fp8,0,0.02677760124206543
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,96,96,128,1,float16,float16,0,0.020499199628829956
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,96,96,128,1,float16,fp8,0,0.018595199286937713
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,96,96,128,1,fp8,fp8,0,0.16722559928894043
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,96,96,128,1,fp8,fp8,0,0.01871200054883957
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,96,1,128,1,float16,float16,0,0.01671999990940094
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,96,1,128,1,float16,fp8,0,0.0166143998503685
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,96,1,128,1,fp8,fp8,0,0.016607999801635742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,96,2,128,1,float16,float16,0,0.016553600132465363
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,96,2,128,1,float16,fp8,0,0.016607999801635742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,96,4,128,1,float16,float16,0,0.016607999801635742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,96,2,128,1,fp8,fp8,0,0.01658719927072525
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,96,4,128,1,float16,fp8,0,0.01661120057106018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,96,4,128,1,fp8,fp8,0,0.01661760061979294
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,96,8,128,1,float16,float16,0,0.016872000694274903
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,96,8,128,1,float16,fp8,0,0.01663520038127899
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,96,8,128,1,fp8,fp8,0,0.016568000614643096
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,96,4,128,1,float16,fp8,0,0.14269280433654785
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,96,1,128,1,float16,float16,0,0.013131199777126313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,96,96,128,1,float16,float16,0,0.09181439876556396
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,96,1,128,1,float16,fp8,0,0.013264000415802002
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,96,1,128,1,fp8,fp8,0,0.013758400082588195
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,96,96,128,1,float16,fp8,0,0.014584000408649444
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,96,2,128,1,float16,float16,0,0.01268800050020218
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,96,2,128,1,float16,fp8,0,0.01430879980325699
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,96,2,128,1,fp8,fp8,0,0.012771199643611907
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,96,4,128,1,float16,fp8,0,0.01419840008020401
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,96,4,128,1,fp8,fp8,0,0.012814399600028992
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,96,8,128,1,float16,float16,0,0.013129599392414093
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,96,8,128,1,float16,fp8,0,0.01281760036945343
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,96,8,128,1,fp8,fp8,0,0.013788799941539764
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,96,96,128,1,float16,float16,0,0.01252480000257492
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,96,96,128,1,float16,fp8,0,0.010523200035095215
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,96,96,128,1,fp8,fp8,0,0.010518400371074677
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,96,1,128,1,float16,float16,0,0.010515200346708298
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,96,1,128,1,float16,fp8,0,0.01048320010304451
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,96,8,128,1,float16,float16,0,0.07827039957046508
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,96,1,128,1,fp8,fp8,0,0.010364799946546554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,96,2,128,1,float16,fp8,0,0.01040000021457672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,96,2,128,1,fp8,fp8,0,0.010416000336408614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,96,4,128,1,float16,float16,0,0.010463999956846238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,96,4,128,1,float16,fp8,0,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,96,4,128,1,fp8,fp8,0,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,96,8,128,1,float16,float16,0,0.010416000336408614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,96,8,128,1,float16,fp8,0,0.010390400141477584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,96,8,128,1,fp8,fp8,0,0.010424000024795533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,96,1,128,1,float16,float16,0,0.5121488094329834
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,96,1,128,1,fp8,fp8,0,0.49233441352844237
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,96,4,128,1,float16,fp8,0,0.02683199942111969
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,96,2,128,1,float16,float16,0,0.5117616176605224
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,96,2,128,1,float16,fp8,0,0.49201598167419436
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,96,2,128,1,fp8,fp8,0,0.49204158782958984
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,96,4,128,1,float16,float16,0,0.5131951808929444
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,96,4,128,1,float16,fp8,0,0.4920639991760254
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,96,96,128,1,float16,float16,0,0.01658560037612915
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,96,96,128,1,fp8,fp8,0,0.014483200013637542
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,96,4,128,1,fp8,fp8,0,0.49306559562683105
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,96,8,128,1,float16,float16,0,0.5158512115478515
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,96,8,128,1,float16,fp8,0,0.49171199798583987
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,96,96,128,1,float16,float16,0,0.293939208984375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,96,4,128,1,float16,float16,0,0.012503999471664428
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,96,8,128,1,fp8,fp8,0,0.49164958000183107
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,96,96,128,1,float16,fp8,0,0.2763184070587158
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,96,96,128,1,fp8,fp8,0,0.27640159130096437
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,96,1,128,1,float16,float16,0,0.2667455911636353
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,96,1,128,1,float16,fp8,0,0.2519855976104736
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,96,1,128,1,fp8,fp8,0,0.2504735946655273
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,96,2,128,1,float16,float16,0,0.26535520553588865
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,96,2,128,1,float16,fp8,0,0.2518496036529541
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,96,2,128,1,fp8,fp8,0,0.2513520002365112
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,96,2,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,96,4,128,1,float16,float16,0,0.2665424108505249
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,96,4,128,1,float16,fp8,0,0.25154719352722166
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,96,4,128,1,fp8,fp8,0,0.2513008117675781
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,96,8,128,1,float16,float16,0,0.2672816038131714
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,96,8,128,1,float16,fp8,0,0.2524143934249878
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,96,96,128,1,float16,float16,0,0.15278719663619994
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,96,8,128,1,fp8,fp8,0,0.2514480113983154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,96,96,128,1,fp8,fp8,0,0.14404480457305907
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,96,1,128,1,float16,float16,0,0.1380895972251892
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,96,1,128,1,fp8,fp8,0,0.131112003326416
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,96,2,128,1,float16,float16,0,0.13861919641494752
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,96,2,128,1,float16,fp8,0,0.13085919618606567
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,96,2,128,1,fp8,fp8,0,0.13148319721221924
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,96,4,128,1,float16,float16,0,0.13841919898986815
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,96,4,128,1,float16,fp8,0,0.13082879781723022
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,96,4,128,1,fp8,fp8,0,0.1306928038597107
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,96,8,128,1,float16,float16,0,0.14071199893951417
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,96,8,128,1,float16,fp8,0,0.13072160482406617
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,96,8,128,1,fp8,fp8,0,0.1304800033569336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,96,96,128,1,float16,float16,0,0.08309760093688964
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,96,1,128,1,float16,fp8,0,0.49221439361572267
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,96,96,128,1,float16,fp8,0,0.07622399926185608
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,96,96,128,1,fp8,fp8,0,0.07596960067749023
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,96,1,128,1,float16,fp8,0,0.07009599804878235
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,96,1,128,1,fp8,fp8,0,0.07021600008010864
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,96,2,128,1,float16,float16,0,0.07655680179595947
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,96,2,128,1,float16,fp8,0,0.07052159905433655
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,96,4,128,1,float16,float16,0,0.07509599924087525
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,96,2,128,1,fp8,fp8,0,0.06997759938240052
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,96,4,128,1,fp8,fp8,0,0.07047680020332336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,96,8,128,1,float16,float16,0,0.07523679733276367
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,96,8,128,1,float16,fp8,0,0.06970559954643249
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,96,96,128,1,float16,float16,0,0.04524959921836853
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,96,8,128,1,fp8,fp8,0,0.07059839963912964
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,96,96,128,1,float16,fp8,0,0.04303199946880341
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,96,96,128,1,fp8,fp8,0,0.04315199851989746
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,96,1,128,1,float16,float16,0,0.04325439929962158
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,96,1,128,1,float16,fp8,0,0.03930560052394867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,96,1,128,1,fp8,fp8,0,0.03962720036506653
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,96,2,128,1,float16,float16,0,0.04324159920215607
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,96,2,128,1,float16,fp8,0,0.03936800062656402
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,96,4,128,1,float16,fp8,0,0.039504000544548036
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,96,4,128,1,float16,float16,0,0.043227198719978335
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,96,8,128,1,float16,float16,0,0.04314880073070526
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,96,8,128,1,float16,fp8,0,0.040171200037002565
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,96,8,128,1,fp8,fp8,0,0.03956319987773895
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,96,96,128,1,float16,float16,0,0.028905600309371948
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,96,96,128,1,float16,fp8,0,0.026764801144599913
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,96,96,128,1,fp8,fp8,0,0.02672959864139557
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,96,1,128,1,float16,float16,0,0.02621920108795166
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,96,1,128,1,float16,fp8,0,0.024806399643421174
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,96,1,128,1,fp8,fp8,0,0.024803200364112855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,96,2,128,1,float16,float16,0,0.026467201113700867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,96,2,128,1,float16,fp8,0,0.024803200364112855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,96,2,128,1,fp8,fp8,0,0.02481279969215393
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,96,96,128,1,float16,fp8,0,0.14353439807891846
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,96,4,128,1,float16,float16,0,0.026862400770187377
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,96,1,128,1,float16,fp8,0,0.131167995929718
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,96,4,128,1,float16,fp8,0,0.024780799448490144
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,96,4,128,1,fp8,fp8,0,0.024843199551105498
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,96,8,128,1,float16,float16,0,0.026652801036834716
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,96,8,128,1,float16,fp8,0,0.024775999784469604
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,96,8,128,1,fp8,fp8,0,0.024740800261497498
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,96,96,128,1,float16,float16,0,0.018636800348758698
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,96,96,128,1,float16,fp8,0,0.01661760061979294
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,96,1,128,1,float16,float16,0,0.01663520038127899
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,96,1,128,1,float16,fp8,0,0.016624000668525696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,96,96,128,1,fp8,fp8,0,0.016590400040149687
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,96,1,128,1,fp8,fp8,0,0.01653759926557541
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,96,2,128,1,float16,fp8,0,0.016607999801635742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,96,2,128,1,float16,float16,0,0.016590400040149687
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,96,1,128,1,float16,float16,0,0.07588160037994385
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,96,4,128,1,float16,float16,0,0.016574400663375854
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,96,4,128,1,float16,fp8,0,0.01660960018634796
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,96,8,128,1,float16,float16,0,0.016620799899101257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,96,8,128,1,float16,fp8,0,0.016752000153064727
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,96,4,128,1,float16,fp8,0,0.0705951988697052
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,96,8,128,1,fp8,fp8,0,0.016627199947834015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,96,96,128,1,float16,float16,0,0.015060800313949584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,96,96,128,1,float16,fp8,0,0.012788799405097962
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,96,96,128,1,fp8,fp8,0,0.012548799812793731
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,96,1,128,1,float16,float16,0,0.012481600046157837
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,96,1,128,1,float16,fp8,0,0.012491200119256973
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,96,1,128,1,fp8,fp8,0,0.012518399953842163
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,96,2,128,1,float16,float16,0,0.012481600046157837
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,96,2,128,1,float16,fp8,0,0.012454400211572647
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,96,2,128,1,fp8,fp8,0,0.012681600451469422
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,96,4,128,1,float16,fp8,0,0.012451200187206269
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,96,4,128,1,fp8,fp8,0,0.012529599666595458
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,96,8,128,1,float16,fp8,0,0.012566399574279786
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,96,2,128,1,fp8,fp8,0,0.03970719873905182
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,96,8,128,1,fp8,fp8,0,0.01252480000257492
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,96,96,128,1,float16,float16,0,0.012510399520397186
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,96,96,128,1,float16,fp8,0,0.010438399761915207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,96,96,128,1,fp8,fp8,0,0.010409600287675857
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,96,1,128,1,float16,float16,0,0.010387200117111205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,96,1,128,1,float16,fp8,0,0.010388799756765366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,96,4,128,1,fp8,fp8,0,0.03946079909801483
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,96,2,128,1,float16,float16,0,0.010440000146627427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,96,1,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,96,2,128,1,float16,fp8,0,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,96,2,128,1,fp8,fp8,0,0.010360000282526016
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,96,4,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,96,4,128,1,float16,fp8,0,0.01064639985561371
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,96,4,128,1,fp8,fp8,0,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,96,8,128,1,float16,float16,0,0.010443200170993806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,96,8,128,1,float16,fp8,0,0.010372799634933472
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,96,8,128,1,fp8,fp8,0,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,96,1,128,1,float16,fp8,0,0.4797152042388916
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,96,1,128,1,fp8,fp8,0,0.47885761260986326
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,96,1,128,1,float16,float16,0,0.5103151798248291
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,96,2,128,1,float16,float16,0,0.5086991786956787
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,96,2,128,1,float16,fp8,0,0.47891840934753416
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,96,2,128,1,fp8,fp8,0,0.4798272132873535
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,96,4,128,1,float16,float16,0,0.5103231906890869
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,96,2,128,1,fp8,fp8,0,0.016649599373340606
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,96,4,128,1,fp8,fp8,0,0.016545599699020384
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,96,4,128,1,float16,fp8,0,0.47938880920410154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,96,4,128,1,fp8,fp8,0,0.47926878929138184
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,96,8,128,1,float16,float16,0,0.510539197921753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,96,4,128,1,float16,float16,0,0.012516799569129943
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,96,96,128,1,float16,fp8,0,0.24654560089111327
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,96,8,128,1,float16,float16,0,0.012577599287033081
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,96,96,128,1,fp8,fp8,0,0.24673280715942383
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,96,1,128,1,float16,float16,0,0.26403839588165284
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,96,1,128,1,float16,fp8,0,0.24470560550689696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,96,1,128,1,fp8,fp8,0,0.24457600116729736
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,96,2,128,1,float16,float16,0,0.26147520542144775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,96,2,128,1,float16,fp8,0,0.2447360038757324
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,96,2,128,1,fp8,fp8,0,0.24470720291137696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,96,4,128,1,float16,float16,0,0.26272799968719485
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,96,4,128,1,float16,fp8,0,0.24535999298095704
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,96,4,128,1,fp8,fp8,0,0.24447200298309327
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,96,8,128,1,float16,fp8,0,0.24575679302215575
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,96,8,128,1,fp8,fp8,0,0.2451200008392334
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,96,96,128,1,float16,float16,0,0.14035359621047974
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,96,96,128,1,float16,fp8,0,0.12910239696502684
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,96,96,128,1,fp8,fp8,0,0.12901920080184937
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,96,1,128,1,float16,fp8,0,0.1282047986984253
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,96,1,128,1,fp8,fp8,0,0.12810399532318115
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,96,2,128,1,float16,fp8,0,0.12798399925231935
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,96,2,128,1,float16,float16,0,0.13774240016937256
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,96,4,128,1,float16,float16,0,0.13816319704055785
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,96,4,128,1,float16,fp8,0,0.12871840000152587
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,96,4,128,1,fp8,fp8,0,0.12883520126342773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,96,8,128,1,float16,float16,0,0.13783520460128784
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,96,8,128,1,float16,fp8,0,0.12832640409469603
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,96,96,128,1,float16,float16,0,0.07644799947738648
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,96,8,128,1,fp8,fp8,0,0.1283552050590515
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,96,8,128,1,float16,fp8,0,0.4787759780883789
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,96,96,128,1,float16,float16,0,0.26256160736083983
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,96,96,128,1,float16,fp8,0,0.06900799870491028
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,96,1,128,1,float16,float16,0,0.074590402841568
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,96,8,128,1,fp8,fp8,0,0.47896318435668944
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,96,1,128,1,float16,fp8,0,0.06817600131034851
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,96,1,128,1,fp8,fp8,0,0.06824160218238831
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,96,2,128,1,float16,float16,0,0.07458080053329467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,96,2,128,1,float16,fp8,0,0.06832479834556579
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,96,2,128,1,fp8,fp8,0,0.06835839748382569
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,96,4,128,1,float16,float16,0,0.07410079836845399
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,96,4,128,1,float16,fp8,0,0.068886399269104
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,96,4,128,1,fp8,fp8,0,0.06885280013084412
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,96,8,128,1,float16,float16,0,0.07414240241050721
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,96,96,128,1,float16,float16,0,0.04465759992599487
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,96,8,128,1,fp8,fp8,0,0.0685375988483429
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,96,96,128,1,float16,fp8,0,0.039238399267196654
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,96,1,128,1,float16,float16,0,0.04235199987888336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,96,1,128,1,float16,fp8,0,0.03914079964160919
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,96,1,128,1,fp8,fp8,0,0.03914560079574585
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,96,2,128,1,float16,fp8,0,0.03916319906711578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,96,2,128,1,fp8,fp8,0,0.039094400405883786
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,96,8,128,1,float16,float16,0,0.26034400463104246
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,96,4,128,1,float16,float16,0,0.04294080138206482
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,96,4,128,1,float16,fp8,0,0.039099198579788205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,96,4,128,1,fp8,fp8,0,0.039131200313568114
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,96,8,128,1,float16,float16,0,0.04142560064792633
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,96,8,128,1,float16,fp8,0,0.03916960060596466
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,96,8,128,1,fp8,fp8,0,0.03918080031871796
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,96,1,128,1,float16,float16,0,0.13830560445785522
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,96,96,128,1,float16,float16,0,0.028110399842262268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,96,96,128,1,float16,fp8,0,0.024799999594688416
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,96,1,128,1,float16,float16,0,0.025654399394989015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,96,1,128,1,fp8,fp8,0,0.024820800125598907
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,96,2,128,1,float16,float16,0,0.025633600354194642
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,96,2,128,1,float16,fp8,0,0.024750399589538574
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,96,2,128,1,fp8,fp8,0,0.024753600358963013
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,96,4,128,1,float16,float16,0,0.024835200607776643
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,96,4,128,1,float16,fp8,0,0.02468640059232712
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,96,4,128,1,fp8,fp8,0,0.024799999594688416
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,96,8,128,1,float16,float16,0,0.02481440007686615
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,96,8,128,1,float16,fp8,0,0.02479040026664734
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,96,8,128,1,fp8,fp8,0,0.024803200364112855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,96,96,128,1,float16,float16,0,0.01868479996919632
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,96,96,128,1,float16,fp8,0,0.016515199840068818
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,96,96,128,1,fp8,fp8,0,0.016616000235080718
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,96,1,128,1,float16,float16,0,0.016622400283813475
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,96,1,128,1,float16,fp8,0,0.016539199650287627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,96,1,128,1,fp8,fp8,0,0.015513600409030914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,96,96,128,1,fp8,fp8,0,0.06926720142364502
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,96,2,128,1,float16,float16,0,0.01656160056591034
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,96,2,128,1,float16,fp8,0,0.016547200083732606
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,96,2,128,1,fp8,fp8,0,0.015724800527095795
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,96,4,128,1,float16,float16,0,0.0165583997964859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,96,4,128,1,float16,fp8,0,0.015881599485874177
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,96,8,128,1,float16,float16,0,0.01661919951438904
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,96,8,128,1,float16,fp8,0,0.06827999949455262
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,96,8,128,1,float16,fp8,0,0.016568000614643096
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,96,8,128,1,fp8,fp8,0,0.01653279960155487
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,96,96,128,1,float16,float16,0,0.014667199552059173
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,96,96,128,1,float16,fp8,0,0.01250080019235611
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,96,2,128,1,float16,float16,0,0.042259201407432556
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,96,1,128,1,float16,float16,0,0.012484800070524216
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,96,1,128,1,float16,fp8,0,0.012492799758911132
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,96,1,128,1,fp8,fp8,0,0.012606400251388549
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,96,2,128,1,float16,float16,0,0.012628799676895142
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,96,2,128,1,float16,fp8,0,0.012532800436019897
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,96,96,128,1,fp8,fp8,0,0.024804799258708952
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,96,4,128,1,float16,float16,0,0.012540799379348756
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,96,4,128,1,float16,fp8,0,0.012491200119256973
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,96,4,128,1,fp8,fp8,0,0.012492799758911132
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,96,2,128,1,fp8,fp8,0,0.12788959741592407
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,96,8,128,1,float16,fp8,0,0.012481600046157837
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,96,8,128,1,float16,float16,0,0.01260959953069687
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,96,8,128,1,fp8,fp8,0,0.012531200051307678
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,96,96,128,1,float16,float16,0,0.012462399899959564
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,96,96,128,1,float16,fp8,0,0.010486400127410889
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,96,1,128,1,float16,float16,0,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,96,1,128,1,float16,fp8,0,0.01048159971833229
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,96,1,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,96,2,128,1,float16,float16,0,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,96,2,128,1,float16,fp8,0,0.010491199791431427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,96,2,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,96,4,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,96,4,128,1,float16,fp8,0,0.010480000078678131
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,96,4,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,96,8,128,1,float16,float16,0,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,96,8,128,1,float16,fp8,0,0.010476800054311753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,96,4,128,1,fp8,fp8,0,0.016524800658226015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,96,8,128,1,fp8,fp8,0,0.01037919968366623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,96,96,128,1,fp8,fp8,0,0.03916800022125244
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,96,96,128,1,fp8,fp8,0,0.012508800625801087
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,96,2,128,1,fp8,fp8,0,0.012561599910259246
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,96,1,128,1,float16,fp8,0,0.024766400456428528
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,96,96,128,1,fp8,fp8,0,0.010476800054311753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,1,128,1,float16,fp8,0,23.152793884277344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,2,128,1,float16,fp8,0,22.930726623535158
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,1,128,1,fp8,fp8,0,23.159028625488283
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,2,128,1,fp8,fp8,0,22.863133239746094
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,4,128,1,float16,fp8,0,22.993621826171875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,1,128,1,float16,float16,0,29.310684204101562
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,2,128,1,float16,float16,0,29.35445556640625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,4,128,1,float16,float16,0,29.379244995117187
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,64,128,1,float16,fp8,0,11.832588958740235
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,64,128,1,fp8,fp8,0,11.864137268066406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,1,128,1,float16,float16,0,14.690072631835937
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,4,128,1,fp8,fp8,0,23.39904022216797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,8,128,1,float16,fp8,0,23.50640106201172
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,8,128,1,fp8,fp8,0,23.23201904296875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,8,128,1,float16,float16,0,29.52545166015625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,1,128,1,float16,fp8,0,11.785953521728516
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,1,128,1,fp8,fp8,0,11.6993408203125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,2,128,1,float16,fp8,0,11.79767074584961
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,2,128,1,fp8,fp8,0,11.741190338134766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,2,128,1,float16,float16,0,14.730561828613281
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,4,128,1,float16,float16,0,14.868937683105468
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,4,128,1,float16,fp8,0,11.478246307373047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,4,128,1,fp8,fp8,0,11.6029052734375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,64,128,1,float16,fp8,0,6.08642578125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,8,128,1,float16,fp8,0,11.870755004882813
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,8,128,1,fp8,fp8,0,11.799632263183593
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,8,128,1,float16,float16,0,15.008990478515624
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,64,128,1,fp8,fp8,0,6.086793518066406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,1,128,1,float16,float16,0,7.505540466308593
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,1,128,1,float16,fp8,0,5.765464019775391
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,1,128,1,fp8,fp8,0,5.886009597778321
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,2,128,1,fp8,fp8,0,5.760897445678711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,2,128,1,float16,fp8,0,5.756966400146484
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,2,128,1,float16,float16,0,7.651267242431641
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,4,128,1,float16,fp8,0,5.795105743408203
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,4,128,1,float16,float16,0,7.462843322753907
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,4,128,1,fp8,fp8,0,5.905968093872071
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,8,128,1,float16,fp8,0,5.8058624267578125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,8,128,1,float16,float16,0,7.570118713378906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,64,128,1,float16,fp8,0,3.235230255126953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,64,128,1,fp8,fp8,0,3.06790714263916
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,1,128,1,float16,float16,0,3.615755081176758
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,1,128,1,float16,fp8,0,3.1633392333984376
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,8,128,1,fp8,fp8,0,5.878862380981445
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,1,128,1,fp8,fp8,0,2.896444892883301
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,2,128,1,float16,float16,0,3.736368179321289
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,2,128,1,float16,fp8,0,3.285819244384766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,2,128,1,fp8,fp8,0,2.8751760482788087
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,4,128,1,float16,fp8,0,2.9148096084594726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,4,128,1,float16,float16,0,3.5146129608154295
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,4,128,1,fp8,fp8,0,3.068681526184082
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,8,128,1,float16,fp8,0,2.9190240859985352
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,8,128,1,float16,float16,0,3.624425506591797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,8,128,1,fp8,fp8,0,2.9053472518920898
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,1,128,1,float16,fp8,0,13.471269226074218
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,1,128,1,fp8,fp8,0,13.337907409667968
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,2,128,1,float16,fp8,0,13.450941467285157
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,1,128,1,float16,float16,0,16.999479675292967
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,2,128,1,float16,float16,0,16.84443817138672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,2,128,1,fp8,fp8,0,13.540939331054688
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,4,128,1,float16,fp8,0,13.442657470703125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,4,128,1,fp8,fp8,0,13.374856567382812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,4,128,1,float16,float16,0,17.01154327392578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,8,128,1,float16,float16,0,17.033447265625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,64,128,1,float16,fp8,0,7.048980712890625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,8,128,1,float16,fp8,0,13.660647583007812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,8,128,1,fp8,fp8,0,13.464274597167968
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,64,128,1,fp8,fp8,0,7.062907409667969
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,1,128,1,float16,float16,0,8.490214538574218
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,1,128,1,float16,fp8,0,6.655003356933594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,1,128,1,fp8,fp8,0,6.6090034484863285
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,2,128,1,float16,float16,0,8.481825256347657
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,2,128,1,float16,fp8,0,6.716681671142578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,2,128,1,fp8,fp8,0,6.7188163757324215
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,4,128,1,float16,float16,0,8.317655944824219
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,4,128,1,fp8,fp8,0,6.706591796875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,4,128,1,float16,fp8,0,6.674517059326172
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,8,128,1,float16,float16,0,8.763625335693359
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,8,128,1,float16,fp8,0,6.7414497375488285
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,64,128,1,float16,fp8,0,3.668636703491211
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,64,128,1,fp8,fp8,0,3.622140884399414
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,8,128,1,fp8,fp8,0,6.724467468261719
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,1,128,1,float16,float16,0,4.183358383178711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,1,128,1,float16,fp8,0,3.3723087310791016
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,1,128,1,fp8,fp8,0,3.356409454345703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,2,128,1,float16,fp8,0,3.3311439514160157
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,2,128,1,fp8,fp8,0,3.3708465576171873
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,2,128,1,float16,float16,0,4.227795028686524
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,4,128,1,float16,fp8,0,3.3733055114746096
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,4,128,1,float16,float16,0,4.326809692382812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,4,128,1,fp8,fp8,0,3.557484817504883
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,8,128,1,float16,float16,0,4.262267303466797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,8,128,1,fp8,fp8,0,3.2555137634277345
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,8,128,1,float16,fp8,0,3.5934608459472654
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,64,128,1,float16,fp8,0,1.7477855682373047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,64,128,1,fp8,fp8,0,1.8236799240112305
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,1,128,1,float16,float16,0,1.9550655364990235
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,1,128,1,float16,fp8,0,1.688528060913086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,2,128,1,float16,float16,0,1.9682079315185548
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,1,128,1,fp8,fp8,0,2.0170160293579102
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,2,128,1,fp8,fp8,0,1.6737199783325196
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,2,128,1,float16,fp8,0,2.019436836242676
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,4,128,1,float16,float16,0,1.9135055541992188
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,4,128,1,float16,fp8,0,1.6969024658203125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,4,128,1,fp8,fp8,0,1.9866960525512696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,8,128,1,float16,float16,0,1.9900735855102538
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,8,128,1,float16,fp8,0,1.688420867919922
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,8,128,1,fp8,fp8,0,2.007441520690918
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,1,128,1,float16,fp8,0,9.504073333740234
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,1,128,1,float16,float16,0,11.945654296875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,1,128,1,fp8,fp8,0,9.568736267089843
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,64,128,1,float16,float16,0,4.261932754516602
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,64,128,1,float16,float16,0,2.063212776184082
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,64,128,1,float16,float16,0,3.711590576171875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,2,128,1,float16,float16,0,12.007061004638672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,64,128,1,float16,float16,0,7.575873565673828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,2,128,1,float16,fp8,0,9.580830383300782
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,64,128,1,float16,float16,0,15.014993286132812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,64,128,1,float16,float16,0,8.830912017822266
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,2,128,1,fp8,fp8,0,9.158753967285156
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,4,128,1,float16,fp8,0,9.180184173583985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,4,128,1,fp8,fp8,0,9.397077178955078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,4,128,1,float16,float16,0,11.645123291015626
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,8,128,1,float16,float16,0,12.084107208251954
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,64,128,1,float16,fp8,0,4.966390228271484
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,64,128,1,fp8,fp8,0,4.917275238037109
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,64,128,1,float16,float16,0,6.30306396484375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,8,128,1,float16,fp8,0,9.648953247070313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,1,128,1,float16,fp8,0,5.0312049865722654
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,1,128,1,float16,float16,0,5.98559341430664
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,8,128,1,fp8,fp8,0,9.373403167724609
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,1,128,1,fp8,fp8,0,4.705660629272461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,2,128,1,float16,fp8,0,4.631766510009766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,2,128,1,float16,float16,0,6.080614471435547
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,2,128,1,fp8,fp8,0,4.951729583740234
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,4,128,1,float16,fp8,0,4.6888782501220705
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,4,128,1,fp8,fp8,0,4.767804718017578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,4,128,1,float16,float16,0,6.1348224639892575
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,64,128,1,float16,fp8,0,2.4909055709838865
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,64,128,1,float16,float16,0,3.1948383331298826
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,64,128,1,fp8,fp8,0,2.468177604675293
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,8,128,1,float16,fp8,0,4.6615039825439455
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,8,128,1,fp8,fp8,0,4.726473617553711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,8,128,1,float16,float16,0,5.931051254272461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,1,128,1,float16,float16,0,2.918075180053711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,1,128,1,float16,fp8,0,2.675796890258789
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,1,128,1,fp8,fp8,0,2.3498064041137696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,2,128,1,float16,fp8,0,2.3573551177978516
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,2,128,1,fp8,fp8,0,2.3420528411865233
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,2,128,1,float16,float16,0,2.948084831237793
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,4,128,1,float16,float16,0,2.910215950012207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,4,128,1,float16,fp8,0,2.5834800720214846
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,4,128,1,fp8,fp8,0,2.762623977661133
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,8,128,1,float16,float16,0,2.8937328338623045
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,64,128,1,float16,float16,0,1.467238426208496
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,8,128,1,float16,fp8,0,2.3557167053222656
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,64,128,1,float16,fp8,0,1.6200592041015625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,1,128,1,float16,float16,0,1.4029279708862306
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,8,128,1,fp8,fp8,0,2.3509296417236327
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,1,128,1,float16,fp8,0,1.1808735847473144
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,64,128,1,fp8,fp8,0,1.643132781982422
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,1,128,1,fp8,fp8,0,1.4410176277160645
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,2,128,1,float16,float16,0,1.4774800300598145
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,2,128,1,float16,fp8,0,1.205668830871582
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,4,128,1,float16,fp8,0,1.2127039909362793
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,2,128,1,fp8,fp8,0,1.2201456069946288
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,8,128,1,float16,float16,0,1.3902463912963867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,8,128,1,float16,fp8,0,1.317140769958496
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,8,128,1,fp8,fp8,0,1.3672240257263184
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,4,128,1,fp8,fp8,0,1.1718688011169434
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,4,128,1,float16,float16,0,1.4858639717102051
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,1,128,1,float16,fp8,0,12.417675018310547
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,1,128,1,fp8,fp8,0,12.221393585205078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,2,128,1,float16,fp8,0,12.31588134765625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,1,128,1,float16,float16,0,15.742948913574219
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,2,128,1,fp8,fp8,0,12.078947448730469
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,2,128,1,float16,float16,0,15.754617309570312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,4,128,1,float16,float16,0,15.197027587890625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,4,128,1,float16,fp8,0,12.050611114501953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,64,128,1,float16,fp8,0,6.750335693359375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,64,128,1,float16,float16,0,8.65323486328125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,4,128,1,fp8,fp8,0,12.436420440673828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,8,128,1,float16,fp8,0,12.450514984130859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,64,128,1,fp8,fp8,0,6.67431869506836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,1,128,1,float16,float16,0,7.885816192626953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,8,128,1,fp8,fp8,0,12.382810974121094
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,8,128,1,float16,float16,0,15.889122009277344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,1,128,1,float16,fp8,0,6.1361854553222654
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,1,128,1,fp8,fp8,0,6.229702377319336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,2,128,1,float16,fp8,0,6.182129669189453
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,2,128,1,fp8,fp8,0,6.018230438232422
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,2,128,1,float16,float16,0,7.861354827880859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,4,128,1,float16,fp8,0,6.186665725708008
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,4,128,1,fp8,fp8,0,6.201561737060547
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,4,128,1,float16,float16,0,7.856304168701172
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,64,128,1,float16,fp8,0,3.5017776489257812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,64,128,1,float16,float16,0,4.1360126495361325
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,8,128,1,float16,fp8,0,6.126412963867187
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,64,128,1,fp8,fp8,0,3.400980758666992
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,8,128,1,fp8,fp8,0,6.158379364013672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,1,128,1,float16,fp8,0,3.1990640640258787
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,1,128,1,float16,float16,0,3.6797279357910155
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,1,128,1,fp8,fp8,0,3.0781551361083985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,2,128,1,float16,float16,0,3.9359249114990233
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,2,128,1,float16,fp8,0,3.029363250732422
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,2,128,1,fp8,fp8,0,3.1210527420043945
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,8,128,1,float16,float16,0,8.10080337524414
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,4,128,1,float16,fp8,0,2.9436576843261717
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,4,128,1,float16,float16,0,3.8126880645751955
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,4,128,1,fp8,fp8,0,3.253996658325195
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,64,128,1,float16,fp8,0,1.615065574645996
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,8,128,1,float16,float16,0,3.8908031463623045
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,64,128,1,fp8,fp8,0,1.6605920791625977
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,8,128,1,fp8,fp8,0,3.1141647338867187
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,8,128,1,float16,fp8,0,3.2846065521240235
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,1,128,1,float16,fp8,0,1.530675220489502
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,1,128,1,float16,float16,0,1.9968832015991211
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,1,128,1,fp8,fp8,0,1.5088560104370117
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,2,128,1,float16,fp8,0,1.5641136169433594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,2,128,1,float16,float16,0,1.9911184310913086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,2,128,1,fp8,fp8,0,1.557804775238037
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,4,128,1,float16,fp8,0,1.5267760276794433
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,4,128,1,float16,float16,0,1.7352544784545898
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,4,128,1,fp8,fp8,0,1.7626239776611328
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,64,128,1,float16,fp8,0,0.8554592132568359
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,8,128,1,float16,float16,0,1.8189407348632813
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,64,128,1,float16,float16,0,1.0026063919067383
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,8,128,1,float16,fp8,0,1.520032024383545
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,64,128,1,fp8,fp8,0,1.0373583793640138
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,64,128,1,float16,float16,0,2.005855941772461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,1,128,1,float16,float16,0,0.9008336067199707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,8,128,1,fp8,fp8,0,1.8486303329467773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,1,128,1,float16,fp8,0,0.7904767990112305
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,2,128,1,float16,fp8,0,0.7920303821563721
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,1,128,1,fp8,fp8,0,0.9390591621398926
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,2,128,1,float16,float16,0,0.9813823699951172
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,2,128,1,fp8,fp8,0,0.8774160385131836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,4,128,1,float16,float16,0,0.8956447601318359
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,4,128,1,fp8,fp8,0,0.8328127861022949
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,8,128,1,float16,float16,0,0.9038847923278809
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,8,128,1,float16,fp8,0,0.7786704063415527
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,8,128,1,fp8,fp8,0,0.8876848220825195
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,4,128,1,float16,fp8,0,0.8559103965759277
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,1,128,1,float16,fp8,0,7.0344383239746096
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,1,128,1,fp8,fp8,0,7.074308776855469
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,2,128,1,fp8,fp8,0,7.156972503662109
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,2,128,1,float16,fp8,0,7.179695892333984
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,1,128,1,float16,float16,0,9.092955017089844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,2,128,1,float16,float16,0,9.099662780761719
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,4,128,1,float16,float16,0,9.050732421875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,4,128,1,float16,fp8,0,7.005225372314453
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,64,128,1,float16,fp8,0,4.013971328735352
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,64,128,1,float16,float16,0,5.026094436645508
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,64,128,1,fp8,fp8,0,4.140908813476562
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,4,128,1,fp8,fp8,0,7.1899871826171875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,8,128,1,fp8,fp8,0,7.1697853088378904
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,8,128,1,float16,fp8,0,7.2465568542480465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,1,128,1,float16,float16,0,4.35701904296875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,8,128,1,float16,float16,0,9.120006561279297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,1,128,1,float16,fp8,0,3.5500816345214843
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,2,128,1,float16,fp8,0,3.555526351928711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,1,128,1,fp8,fp8,0,3.9422767639160154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,2,128,1,fp8,fp8,0,3.58996467590332
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,2,128,1,float16,float16,0,4.572387313842773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,4,128,1,float16,fp8,0,3.7440254211425783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,4,128,1,float16,float16,0,4.566939163208008
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,4,128,1,fp8,fp8,0,3.894303894042969
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,64,128,1,float16,float16,0,2.344897651672363
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,64,128,1,fp8,fp8,0,1.9786239624023438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,64,128,1,float16,fp8,0,2.4373231887817384
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,8,128,1,float16,float16,0,4.533631896972656
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,8,128,1,float16,fp8,0,3.576283264160156
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,8,128,1,fp8,fp8,0,3.43298225402832
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,1,128,1,float16,float16,0,2.1529680252075196
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,1,128,1,float16,fp8,0,2.0561279296875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,1,128,1,fp8,fp8,0,1.7773199081420898
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,2,128,1,fp8,fp8,0,1.8017711639404297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,2,128,1,float16,float16,0,2.117359924316406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,2,128,1,float16,fp8,0,2.1399023056030275
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,4,128,1,float16,float16,0,2.0054176330566404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,4,128,1,float16,fp8,0,2.049286460876465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,4,128,1,fp8,fp8,0,1.706163215637207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,64,128,1,float16,fp8,0,1.045584011077881
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,64,128,1,float16,float16,0,1.3498784065246583
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,8,128,1,float16,fp8,0,1.7685312271118163
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,64,128,1,fp8,fp8,0,1.2171024322509765
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,8,128,1,float16,float16,0,2.1077951431274413
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,8,128,1,fp8,fp8,0,1.7956367492675782
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,1,128,1,float16,float16,0,1.0225279808044434
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,1,128,1,float16,fp8,0,0.8798048019409179
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,1,128,1,fp8,fp8,0,0.9067248344421387
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,2,128,1,float16,fp8,0,0.9344160079956054
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,2,128,1,float16,float16,0,1.1637264251708985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,2,128,1,fp8,fp8,0,0.9995887756347657
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,4,128,1,float16,fp8,0,0.915556812286377
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,4,128,1,float16,float16,0,1.2282976150512694
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,4,128,1,fp8,fp8,0,1.0812095642089843
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,8,128,1,float16,fp8,0,0.8927871704101562
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,64,128,1,float16,fp8,0,0.5419583797454834
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,64,128,1,float16,float16,0,0.6433887958526612
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,8,128,1,fp8,fp8,0,0.9057503700256347
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,1,128,1,float16,float16,0,0.651259183883667
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,1,128,1,float16,fp8,0,0.5186816215515136
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,1,128,1,fp8,fp8,0,0.47120962142944334
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,2,128,1,float16,float16,0,0.5269440174102783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,2,128,1,fp8,fp8,0,0.47934880256652834
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,2,128,1,float16,fp8,0,0.5522496223449707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,4,128,1,float16,fp8,0,0.492464017868042
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,4,128,1,fp8,fp8,0,0.4686607837677002
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,4,128,1,float16,float16,0,0.634388780593872
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,8,128,1,float16,float16,0,0.5299312114715576
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,8,128,1,float16,fp8,0,0.46927838325500487
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,8,128,1,fp8,fp8,0,0.5542895793914795
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,8,128,1,float16,float16,0,1.038742446899414
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,64,128,1,fp8,fp8,0,0.5307216167449951
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,1,128,1,float16,fp8,0,6.660240173339844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,1,128,1,fp8,fp8,0,6.704443359375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,2,128,1,fp8,fp8,0,6.734951782226562
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,1,128,1,float16,float16,0,8.464756774902344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,2,128,1,float16,float16,0,8.660982513427735
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,4,128,1,float16,fp8,0,6.619617462158203
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,2,128,1,float16,fp8,0,6.781620788574219
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,4,128,1,float16,float16,0,8.339801788330078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,4,128,1,fp8,fp8,0,6.761883544921875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,64,128,1,float16,float16,0,4.911048126220703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,8,128,1,float16,fp8,0,6.774018859863281
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,64,128,1,fp8,fp8,0,3.80633430480957
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,1,128,1,float16,float16,0,4.0679279327392575
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,8,128,1,float16,float16,0,8.626361846923828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,8,128,1,fp8,fp8,0,6.9341987609863285
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,1,128,1,float16,fp8,0,3.4290081024169923
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,64,128,1,float16,fp8,0,3.826923370361328
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,1,128,1,fp8,fp8,0,3.442931365966797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,2,128,1,float16,float16,0,4.25097770690918
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,2,128,1,float16,fp8,0,3.330187225341797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,2,128,1,fp8,fp8,0,3.590419387817383
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,4,128,1,float16,fp8,0,3.4165409088134764
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,4,128,1,float16,float16,0,4.195547103881836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,4,128,1,fp8,fp8,0,3.413220977783203
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,64,128,1,float16,fp8,0,1.9335599899291993
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,64,128,1,float16,float16,0,2.2851696014404297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,8,128,1,float16,fp8,0,3.252374267578125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,8,128,1,float16,float16,0,3.914825439453125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,64,128,1,fp8,fp8,0,1.9831167221069337
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,1,128,1,float16,float16,0,2.1572591781616213
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,8,128,1,fp8,fp8,0,3.58883056640625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,1,128,1,float16,fp8,0,1.6781440734863282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,1,128,1,fp8,fp8,0,1.692411231994629
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,2,128,1,float16,fp8,0,1.6828863143920898
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,2,128,1,float16,float16,0,2.0485584259033205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,2,128,1,fp8,fp8,0,1.8704048156738282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,4,128,1,float16,float16,0,1.9752799987792968
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,4,128,1,fp8,fp8,0,1.6832944869995117
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,4,128,1,float16,fp8,0,2.005672073364258
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,64,128,1,float16,fp8,0,0.9846495628356934
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,8,128,1,float16,float16,0,2.0221584320068358
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,64,128,1,float16,float16,0,1.2085712432861329
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,8,128,1,float16,fp8,0,1.6706592559814453
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,64,128,1,fp8,fp8,0,1.1753727912902832
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,1,128,1,float16,float16,0,0.9463552474975586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,1,128,1,float16,fp8,0,0.878052806854248
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,1,128,1,fp8,fp8,0,0.8380928039550781
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,2,128,1,fp8,fp8,0,0.8671296119689942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,2,128,1,float16,fp8,0,0.9983183860778808
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,2,128,1,float16,float16,0,1.1305567741394043
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,4,128,1,float16,fp8,0,0.8530287742614746
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,4,128,1,fp8,fp8,0,0.8440976142883301
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,8,128,1,float16,float16,0,0.9574671745300293
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,64,128,1,float16,float16,0,0.6808720111846924
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,8,128,1,float16,fp8,0,0.8388463973999023
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,64,128,1,float16,fp8,0,0.5020527839660645
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,8,128,1,fp8,fp8,0,0.9909551620483399
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,64,128,1,fp8,fp8,0,0.6068560123443604
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,1,128,1,float16,fp8,0,0.4388864040374756
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,1,128,1,float16,float16,0,0.5627520084381104
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,1,128,1,fp8,fp8,0,0.4370751857757568
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,2,128,1,float16,float16,0,0.497324800491333
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,2,128,1,fp8,fp8,0,0.4385727882385254
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,2,128,1,float16,fp8,0,0.5295584201812744
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,4,128,1,float16,fp8,0,0.43797760009765624
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,4,128,1,float16,float16,0,0.5406623840332031
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,4,128,1,fp8,fp8,0,0.47628159523010255
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,8,128,1,float16,float16,0,0.4938864231109619
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,8,128,1,fp8,fp8,0,1.6539648056030274
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,64,128,1,float16,fp8,0,0.3062432050704956
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,8,128,1,fp8,fp8,0,0.4347792148590088
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,64,128,1,fp8,fp8,0,0.2733488082885742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,1,128,1,float16,float16,0,0.25614080429077146
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,1,128,1,float16,fp8,0,0.25051040649414064
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,2,128,1,float16,fp8,0,0.2374272108078003
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,2,128,1,float16,float16,0,0.2602144002914429
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,4,128,1,float16,float16,0,0.2568399906158447
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,4,128,1,float16,fp8,0,0.24155840873718262
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,4,128,1,float16,float16,0,0.9742416381835938
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,4,128,1,fp8,fp8,0,0.23772480487823486
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,8,128,1,float16,float16,0,0.2628112077713013
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,8,128,1,float16,fp8,0,0.23796319961547852
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,8,128,1,fp8,fp8,0,0.2384592056274414
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,64,128,1,float16,float16,0,0.3188607931137085
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,8,128,1,float16,fp8,0,0.46938719749450686
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,1,128,1,fp8,fp8,0,0.23748960494995117
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,2,128,1,fp8,fp8,0,0.251475191116333
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,1,128,1,float16,fp8,0,3.8906528472900392
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,1,128,1,fp8,fp8,0,3.912900924682617
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,1,128,1,float16,float16,0,4.950457763671875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,2,128,1,float16,fp8,0,3.8636192321777343
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,2,128,1,fp8,fp8,0,3.9005889892578125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,2,128,1,float16,float16,0,4.800187301635742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,4,128,1,float16,fp8,0,3.8542816162109377
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,4,128,1,float16,float16,0,4.908417510986328
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,4,128,1,fp8,fp8,0,3.9683807373046873
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,8,128,1,float16,float16,0,4.987958526611328
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,8,128,1,float16,fp8,0,4.099662399291992
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,64,128,1,float16,float16,0,2.866352081298828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,64,128,1,float16,fp8,0,2.5147504806518555
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,1,128,1,float16,fp8,0,2.0248287200927733
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,64,128,1,fp8,fp8,0,2.360993576049805
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,8,128,1,fp8,fp8,0,3.9240318298339845
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,1,128,1,float16,float16,0,2.4834783554077147
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,2,128,1,float16,float16,0,2.2948144912719726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,1,128,1,fp8,fp8,0,2.290452766418457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,2,128,1,fp8,fp8,0,1.9723344802856446
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,2,128,1,float16,fp8,0,2.228652763366699
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,4,128,1,float16,fp8,0,1.951348876953125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,4,128,1,fp8,fp8,0,1.9903984069824219
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,4,128,1,float16,float16,0,2.327484893798828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,8,128,1,float16,float16,0,2.494396781921387
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,64,128,1,float16,fp8,0,1.1944671630859376
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,64,128,1,float16,float16,0,1.3724384307861328
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,1,128,1,float16,float16,0,1.1215855598449707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,64,128,1,fp8,fp8,0,1.1828656196594238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,1,128,1,float16,fp8,0,1.2419008255004882
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,8,128,1,fp8,fp8,0,1.9923471450805663
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,8,128,1,float16,fp8,0,2.254680061340332
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,1,128,1,fp8,fp8,0,1.2476143836975098
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,2,128,1,float16,float16,0,1.128980827331543
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,2,128,1,float16,fp8,0,1.140449619293213
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,2,128,1,fp8,fp8,0,1.1126288414001464
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,4,128,1,float16,float16,0,1.1252143859863282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,4,128,1,fp8,fp8,0,1.0360848426818847
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,4,128,1,float16,fp8,0,1.0978256225585938
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,8,128,1,float16,float16,0,1.1378607749938965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,64,128,1,float16,float16,0,0.6970863819122315
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,64,128,1,float16,fp8,0,0.6382304191589355
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,1,128,1,float16,float16,0,0.5635663986206054
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,64,128,1,fp8,fp8,0,0.6686079978942872
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,1,128,1,float16,fp8,0,0.5237391948699951
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,8,128,1,fp8,fp8,0,1.0924896240234374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,1,128,1,fp8,fp8,0,0.6435071945190429
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,2,128,1,float16,float16,0,0.562659215927124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,2,128,1,float16,fp8,0,0.5284063816070557
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,4,128,1,float16,fp8,0,0.5192111968994141
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,4,128,1,float16,float16,0,0.5746208190917969
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,2,128,1,fp8,fp8,0,0.5586880207061767
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,8,128,1,float16,float16,0,0.6094192028045654
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,64,128,1,float16,fp8,0,0.3224463939666748
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,64,128,1,fp8,fp8,0,0.32542879581451417
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,8,128,1,float16,fp8,0,0.5233424186706543
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,1,128,1,float16,fp8,0,0.27110559940338136
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,1,128,1,fp8,fp8,0,0.2747152090072632
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,2,128,1,float16,float16,0,0.2980128049850464
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,2,128,1,float16,fp8,0,0.27154719829559326
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,2,128,1,fp8,fp8,0,0.2746687889099121
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,4,128,1,float16,float16,0,0.29495360851287844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,4,128,1,float16,fp8,0,0.27100000381469724
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,4,128,1,fp8,fp8,0,0.2716304063796997
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,8,128,1,float16,float16,0,0.2992959976196289
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,8,128,1,float16,fp8,0,0.2694416046142578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,8,128,1,fp8,fp8,0,0.2763999938964844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,64,128,1,float16,float16,0,0.19171040058135985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,64,128,1,float16,fp8,0,0.18024959564208984
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,64,128,1,fp8,fp8,0,0.17548160552978515
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,1,128,1,float16,float16,0,0.1628224015235901
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,1,128,1,float16,fp8,0,0.15000159740448
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,1,128,1,fp8,fp8,0,0.15156480073928832
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,8,128,1,float16,fp8,0,1.113432025909424
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,2,128,1,float16,float16,0,0.15944479703903197
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,2,128,1,float16,fp8,0,0.15149439573287965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,2,128,1,fp8,fp8,0,0.14897919893264772
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,4,128,1,float16,float16,0,0.1615231990814209
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,4,128,1,float16,fp8,0,0.15042879581451415
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,4,128,1,fp8,fp8,0,0.14973440170288085
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,8,128,1,float16,float16,0,0.16439199447631836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,8,128,1,float16,fp8,0,0.1498255968093872
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,4,128,1,fp8,fp8,0,0.5500815868377685
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,8,128,1,fp8,fp8,0,0.1500048041343689
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,64,128,1,float16,float16,0,0.3575680017471313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,8,128,1,fp8,fp8,0,0.5303887844085693
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,1,128,1,float16,float16,0,0.3211040019989014
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,1,128,1,float16,fp8,0,3.8226016998291015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,1,128,1,fp8,fp8,0,3.839691162109375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,2,128,1,float16,fp8,0,3.904332733154297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,2,128,1,fp8,fp8,0,3.851023864746094
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,1,128,1,float16,float16,0,4.848369598388672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,2,128,1,float16,float16,0,4.779243087768554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,4,128,1,float16,float16,0,4.607798385620117
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,4,128,1,float16,fp8,0,3.8374366760253906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,64,128,1,float16,fp8,0,2.608492851257324
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,4,128,1,fp8,fp8,0,3.8944671630859373
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,64,128,1,float16,float16,0,3.0518640518188476
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,64,128,1,fp8,fp8,0,2.5832096099853517
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,8,128,1,float16,fp8,0,3.9301055908203124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,8,128,1,fp8,fp8,0,3.934543991088867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,8,128,1,float16,float16,0,4.916561508178711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,1,128,1,float16,float16,0,2.208675193786621
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,1,128,1,float16,fp8,0,1.9491264343261718
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,1,128,1,fp8,fp8,0,1.9448207855224608
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,2,128,1,float16,fp8,0,1.9310880661010743
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,2,128,1,float16,float16,0,2.417830467224121
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,2,128,1,fp8,fp8,0,1.9709295272827148
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,4,128,1,float16,float16,0,2.2356639862060548
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,4,128,1,float16,fp8,0,2.1088336944580077
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,4,128,1,fp8,fp8,0,2.1573808670043944
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,8,128,1,float16,fp8,0,1.980169677734375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,64,128,1,float16,fp8,0,1.2585264205932618
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,8,128,1,float16,float16,0,2.294937515258789
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,64,128,1,float16,float16,0,1.5849311828613282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,1,128,1,float16,float16,0,1.0919008255004883
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,1,128,1,float16,fp8,0,0.9821488380432128
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,8,128,1,fp8,fp8,0,1.9548608779907226
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,64,128,1,fp8,fp8,0,1.4238256454467773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,1,128,1,fp8,fp8,0,0.9975440025329589
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,2,128,1,float16,float16,0,1.092300796508789
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,4,128,1,float16,fp8,0,0.9735088348388672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,2,128,1,fp8,fp8,0,1.131003189086914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,2,128,1,float16,fp8,0,1.196945571899414
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,4,128,1,float16,float16,0,1.1887215614318847
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,8,128,1,float16,float16,0,1.1106816291809083
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,64,128,1,float16,float16,0,0.7204207897186279
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,64,128,1,float16,fp8,0,0.6581967830657959
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,1,128,1,float16,float16,0,0.6285935878753662
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,64,128,1,fp8,fp8,0,0.6708992004394532
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,8,128,1,fp8,fp8,0,0.9874239921569824
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,1,128,1,float16,fp8,0,0.574283218383789
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,1,128,1,fp8,fp8,0,0.4960783958435059
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,2,128,1,float16,float16,0,0.5544864177703858
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,2,128,1,fp8,fp8,0,0.5136032104492188
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,2,128,1,float16,fp8,0,0.6119040012359619
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,4,128,1,float16,float16,0,0.5659567832946777
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,4,128,1,fp8,fp8,0,0.49533920288085936
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,64,128,1,float16,float16,0,0.375163197517395
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,8,128,1,float16,float16,0,0.5755360126495361
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,8,128,1,fp8,fp8,0,0.6019599914550782
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,64,128,1,float16,fp8,0,0.32361760139465334
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,1,128,1,float16,float16,0,0.2930191993713379
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,64,128,1,fp8,fp8,0,0.3608527898788452
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,1,128,1,float16,fp8,0,0.3166415929794312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,1,128,1,fp8,fp8,0,0.2604624032974243
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,2,128,1,float16,float16,0,0.31271839141845703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,2,128,1,float16,fp8,0,0.29361441135406496
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,2,128,1,fp8,fp8,0,0.2716399908065796
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,4,128,1,float16,float16,0,0.28593759536743163
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,4,128,1,float16,fp8,0,0.2875744104385376
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,4,128,1,fp8,fp8,0,0.2799488067626953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,8,128,1,float16,float16,0,0.2906991958618164
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,8,128,1,float16,fp8,0,0.26150240898132326
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,64,128,1,float16,float16,0,0.20245120525360108
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,4,128,1,fp8,fp8,0,1.0012528419494628
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,64,128,1,fp8,fp8,0,0.17547199726104737
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,1,128,1,float16,float16,0,0.15658400058746338
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,1,128,1,float16,fp8,0,0.14262239933013915
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,1,128,1,fp8,fp8,0,0.14280639886856078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,2,128,1,float16,float16,0,0.15552799701690673
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,2,128,1,float16,fp8,0,0.14285759925842284
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,2,128,1,fp8,fp8,0,0.14263839721679689
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,4,128,1,float16,float16,0,0.15487680435180665
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,8,128,1,float16,fp8,0,0.9878399848937989
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,4,128,1,float16,fp8,0,0.14293279647827148
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,4,128,1,fp8,fp8,0,0.14326080083847045
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,8,128,1,float16,fp8,0,0.14407680034637452
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,8,128,1,fp8,fp8,0,0.14436160326004027
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,64,128,1,float16,float16,0,0.11145279407501221
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,64,128,1,float16,fp8,0,0.10050560235977173
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,64,128,1,fp8,fp8,0,0.10087360143661499
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,1,128,1,float16,float16,0,0.0887391984462738
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,4,128,1,float16,fp8,0,0.5449391841888428
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,1,128,1,float16,fp8,0,0.08411679863929748
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,1,128,1,fp8,fp8,0,0.08351200222969055
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,2,128,1,float16,float16,0,0.08887680172920227
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,2,128,1,float16,fp8,0,0.08368480205535889
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,2,128,1,fp8,fp8,0,0.08359519839286804
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,4,128,1,float16,float16,0,0.08943520188331604
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,4,128,1,float16,fp8,0,0.08398720026016235
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,8,128,1,float16,fp8,0,0.5068352222442627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,4,128,1,fp8,fp8,0,0.08353440165519714
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,8,128,1,float16,float16,0,0.09071040153503418
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,8,128,1,fp8,fp8,0,0.08377439975738525
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,8,128,1,fp8,fp8,0,0.2866431951522827
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,1,128,1,float16,fp8,0,2.3670448303222655
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,8,128,1,float16,float16,0,0.15815680027008056
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,1,128,1,fp8,fp8,0,2.388972854614258
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,1,128,1,float16,float16,0,2.7816991806030273
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,8,128,1,float16,fp8,0,0.08410239815711976
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,2,128,1,float16,float16,0,2.675606346130371
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,2,128,1,float16,fp8,0,2.3921087265014647
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,64,128,1,float16,fp8,0,0.17944639921188354
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,4,128,1,float16,float16,0,2.83929443359375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,4,128,1,float16,fp8,0,2.65600643157959
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,4,128,1,fp8,fp8,0,2.363991928100586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,64,128,1,float16,float16,0,1.8574384689331054
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,8,128,1,float16,fp8,0,2.392523193359375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,8,128,1,float16,float16,0,3.0319887161254884
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,8,128,1,fp8,fp8,0,2.4018335342407227
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,1,128,1,float16,float16,0,1.3211039543151855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,64,128,1,float16,fp8,0,1.982703971862793
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,2,128,1,fp8,fp8,0,2.3625215530395507
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,1,128,1,fp8,fp8,0,1.2374704360961915
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,1,128,1,float16,fp8,0,1.3965999603271484
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,2,128,1,float16,fp8,0,1.2149776458740233
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,2,128,1,fp8,fp8,0,1.3399392127990724
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,2,128,1,float16,float16,0,1.4254847526550294
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,4,128,1,float16,float16,0,1.36735200881958
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,4,128,1,float16,fp8,0,1.192033576965332
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,4,128,1,fp8,fp8,0,1.2056431770324707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,8,128,1,float16,float16,0,1.377732753753662
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,64,128,1,float16,float16,0,0.9713456153869628
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,8,128,1,float16,fp8,0,1.208516788482666
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,64,128,1,float16,fp8,0,0.9705007553100586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,8,128,1,fp8,fp8,0,1.2259296417236327
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,1,128,1,float16,float16,0,0.6727824211120605
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,64,128,1,fp8,fp8,0,0.793555212020874
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,1,128,1,float16,fp8,0,0.7472479820251465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,2,128,1,float16,float16,0,0.6670703887939453
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,1,128,1,fp8,fp8,0,0.6973455905914306
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,64,128,1,fp8,fp8,0,1.610985565185547
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,2,128,1,fp8,fp8,0,0.6167503833770752
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,2,128,1,float16,fp8,0,0.7105279922485351
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,4,128,1,float16,float16,0,0.7033215999603272
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,4,128,1,float16,fp8,0,0.6062911987304688
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,64,128,1,float16,float16,0,0.466096019744873
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,64,128,1,float16,fp8,0,0.45288958549499514
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,4,128,1,fp8,fp8,0,0.6740863800048829
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,8,128,1,fp8,fp8,0,0.6094016075134278
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,8,128,1,float16,float16,0,0.6929935932159423
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,1,128,1,float16,float16,0,0.3377520084381104
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,64,128,1,fp8,fp8,0,0.4544879913330078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,1,128,1,float16,fp8,0,0.3145792007446289
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,1,128,1,fp8,fp8,0,0.3210288047790527
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,2,128,1,float16,fp8,0,0.31517601013183594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,4,128,1,float16,float16,0,0.3428976058959961
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,4,128,1,float16,fp8,0,0.3482032060623169
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,4,128,1,fp8,fp8,0,0.3145616054534912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,8,128,1,float16,float16,0,0.3578959941864014
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,8,128,1,float16,fp8,0,0.3149647951126099
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,8,128,1,fp8,fp8,0,0.3146048069000244
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,64,128,1,float16,fp8,0,0.21660799980163575
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,64,128,1,fp8,fp8,0,0.21828320026397705
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,1,128,1,float16,float16,0,0.17692480087280274
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,1,128,1,float16,fp8,0,0.16808799505233765
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,2,128,1,float16,float16,0,0.1796031951904297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,2,128,1,float16,fp8,0,0.16912959814071654
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,2,128,1,fp8,fp8,0,0.16819839477539061
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,4,128,1,float16,float16,0,0.18167680501937866
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,4,128,1,float16,fp8,0,0.16883360147476195
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,4,128,1,fp8,fp8,0,0.16830079555511473
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,8,128,1,float16,float16,0,0.1854640007019043
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,8,128,1,float16,fp8,0,0.16898080110549926
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,8,128,1,fp8,fp8,0,0.16994719505310057
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,64,128,1,float16,float16,0,0.1301535964012146
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,64,128,1,fp8,fp8,0,0.11984319686889648
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,1,128,1,float16,float16,0,0.09904320240020752
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,1,128,1,float16,fp8,0,0.09447519779205323
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,1,128,1,fp8,fp8,0,0.09422879815101623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,2,128,1,float16,float16,0,0.10114879608154297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,2,128,1,float16,fp8,0,0.09441279768943786
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,2,128,1,fp8,fp8,0,0.09483039975166321
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,4,128,1,float16,float16,0,0.10058560371398925
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,4,128,1,float16,fp8,0,0.09426559805870056
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,4,128,1,fp8,fp8,0,0.09445599913597107
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,8,128,1,float16,float16,0,0.10335040092468262
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,8,128,1,float16,fp8,0,0.6363359928131104
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,8,128,1,float16,fp8,0,0.09473119974136353
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,8,128,1,fp8,fp8,0,0.09423840045928955
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,64,128,1,float16,float16,0,0.07593920230865478
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,64,128,1,float16,fp8,0,0.06936320066452026
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,64,128,1,fp8,fp8,0,0.0686464011669159
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,2,128,1,float16,float16,0,0.34191360473632815
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,1,128,1,float16,float16,0,0.06038399934768677
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,1,128,1,float16,fp8,0,0.057257598638534545
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,1,128,1,fp8,fp8,0,0.05714719891548157
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,2,128,1,float16,fp8,0,0.05728800296783447
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,2,128,1,fp8,fp8,0,0.05736799836158753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,4,128,1,float16,fp8,0,0.05738239884376526
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,4,128,1,fp8,fp8,0,0.05779839754104614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,8,128,1,float16,float16,0,0.06228160262107849
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,1,128,1,fp8,fp8,0,0.16914880275726318
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,8,128,1,float16,fp8,0,0.057948797941207886
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,8,128,1,fp8,fp8,0,0.05767679810523987
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,64,128,1,float16,fp8,0,0.11988799571990967
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,2,128,1,fp8,fp8,0,0.3305743932723999
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,1,128,1,float16,fp8,0,2.5025632858276365
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,2,128,1,float16,float16,0,0.06077280044555664
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,1,128,1,float16,float16,0,2.853547286987305
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,4,128,1,float16,float16,0,0.061059200763702394
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,64,128,1,float16,float16,0,0.250382399559021
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,1,128,1,fp8,fp8,0,2.507809638977051
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,2,128,1,float16,fp8,0,2.521670341491699
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,2,128,1,fp8,fp8,0,2.5086288452148438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,2,128,1,float16,float16,0,2.8057535171508787
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,4,128,1,float16,fp8,0,2.504408073425293
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,4,128,1,fp8,fp8,0,2.530240058898926
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,4,128,1,float16,float16,0,3.021774482727051
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,8,128,1,float16,float16,0,3.02042236328125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,8,128,1,float16,fp8,0,2.5143455505371093
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,1,128,1,float16,float16,0,1.36517276763916
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,1,128,1,float16,fp8,0,1.5300559997558594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,64,128,1,float16,float16,0,2.1091264724731444
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,64,128,1,float16,fp8,0,1.9810047149658203
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,1,128,1,fp8,fp8,0,1.464838409423828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,8,128,1,fp8,fp8,0,2.548244857788086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,2,128,1,float16,float16,0,1.356655979156494
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,2,128,1,float16,fp8,0,1.3990544319152831
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,2,128,1,fp8,fp8,0,1.2918992042541504
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,4,128,1,float16,fp8,0,1.2683903694152832
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,4,128,1,float16,float16,0,1.5155759811401368
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,4,128,1,fp8,fp8,0,1.2928432464599608
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,8,128,1,float16,float16,0,1.4303327560424806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,8,128,1,float16,fp8,0,1.422267246246338
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,64,128,1,float16,float16,0,1.0388832092285156
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,64,128,1,float16,fp8,0,0.8876607894897461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,8,128,1,fp8,fp8,0,1.3000639915466308
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,1,128,1,float16,float16,0,0.685697603225708
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,1,128,1,float16,fp8,0,0.730785608291626
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,64,128,1,fp8,fp8,0,1.0080495834350587
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,1,128,1,fp8,fp8,0,0.6769743919372558
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,2,128,1,float16,float16,0,0.7315343856811524
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,2,128,1,float16,fp8,0,0.6384975910186768
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,2,128,1,fp8,fp8,0,0.6517327785491943
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,4,128,1,float16,float16,0,0.7036367893218994
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,64,128,1,fp8,fp8,0,1.7716047286987304
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,4,128,1,float16,fp8,0,0.6948063850402832
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,4,128,1,fp8,fp8,0,0.690124797821045
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,8,128,1,float16,float16,0,0.7140431880950928
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,64,128,1,float16,fp8,0,0.45223679542541506
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,64,128,1,float16,float16,0,0.5203728199005127
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,8,128,1,fp8,fp8,0,0.6399231910705566
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,1,128,1,float16,fp8,0,0.33933439254760744
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,8,128,1,float16,fp8,0,0.7246592044830322
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,1,128,1,fp8,fp8,0,0.33319520950317383
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,2,128,1,float16,float16,0,0.3504784107208252
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,2,128,1,float16,fp8,0,0.3294975996017456
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,2,128,1,fp8,fp8,0,0.32871840000152586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,4,128,1,float16,float16,0,0.3559216022491455
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,4,128,1,float16,fp8,0,0.34928159713745116
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,4,128,1,fp8,fp8,0,0.33112320899963377
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,8,128,1,float16,float16,0,0.364139199256897
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,64,128,1,float16,float16,0,0.26739680767059326
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,8,128,1,fp8,fp8,0,0.32944159507751464
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,1,128,1,float16,float16,0,0.18235679864883422
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,64,128,1,fp8,fp8,0,0.23761439323425293
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,1,128,1,float16,fp8,0,0.17673759460449218
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,1,128,1,fp8,fp8,0,0.17359520196914674
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,2,128,1,float16,float16,0,0.18154720067977906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,2,128,1,float16,fp8,0,0.17413599491119386
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,2,128,1,fp8,fp8,0,0.17711520195007324
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,4,128,1,float16,float16,0,0.18186880350112916
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,4,128,1,float16,fp8,0,0.17602399587631226
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,4,128,1,fp8,fp8,0,0.17424800395965576
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,8,128,1,float16,float16,0,0.18791359663009644
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,8,128,1,float16,fp8,0,0.1738927960395813
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,8,128,1,fp8,fp8,0,0.17572959661483764
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,64,128,1,float16,fp8,0,0.1270576000213623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,1,128,1,float16,float16,0,0.10140800476074219
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,1,128,1,float16,fp8,0,0.09516320228576661
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,1,128,1,fp8,fp8,0,0.0957759976387024
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,2,128,1,float16,float16,0,0.1003216028213501
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,2,128,1,float16,fp8,0,0.09596160054206848
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,2,128,1,fp8,fp8,0,0.09598079919815064
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,4,128,1,float16,float16,0,0.10267039537429809
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,4,128,1,float16,fp8,0,0.09617599844932556
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,4,128,1,fp8,fp8,0,0.09616479873657227
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,8,128,1,float16,float16,0,0.10467840433120727
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,8,128,1,float16,fp8,0,0.09567360281944275
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,64,128,1,fp8,fp8,0,0.45181918144226074
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,8,128,1,fp8,fp8,0,0.09586880207061768
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,64,128,1,float16,float16,0,0.08009920120239258
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,64,128,1,float16,fp8,0,0.0719488024711609
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,64,128,1,fp8,fp8,0,0.07195039987564086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,1,128,1,float16,fp8,0,0.05565279722213745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,8,128,1,float16,fp8,0,0.33063199520111086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,64,128,1,float16,fp8,0,0.24998080730438232
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,2,128,1,float16,float16,0,0.05866559743881226
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,2,128,1,float16,fp8,0,0.05568640232086182
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,1,128,1,fp8,fp8,0,0.05555840134620667
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,2,128,1,fp8,fp8,0,0.05543360114097595
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,4,128,1,float16,float16,0,0.05883520245552063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,4,128,1,float16,fp8,0,0.055529600381851195
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,8,128,1,float16,float16,0,0.06061440110206604
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,8,128,1,float16,fp8,0,0.055743998289108275
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,8,128,1,fp8,fp8,0,0.055615997314453124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,64,128,1,float16,float16,0,0.045396798849105836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,64,128,1,float16,fp8,0,0.04322879910469055
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,64,128,1,fp8,fp8,0,0.04326559901237488
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,1,128,1,float16,float16,0,0.03740479946136475
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,1,128,1,float16,fp8,0,0.03513759970664978
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,1,128,1,fp8,fp8,0,0.03546079993247986
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,64,128,1,fp8,fp8,0,0.12769919633865356
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,2,128,1,float16,float16,0,0.03786399960517883
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,2,128,1,float16,fp8,0,0.03534559905529022
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,2,128,1,fp8,fp8,0,0.035806399583816526
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,4,128,1,float16,float16,0,0.03802880048751831
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,4,128,1,float16,fp8,0,0.03522399961948395
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,4,128,1,fp8,fp8,0,0.03569439947605133
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,8,128,1,float16,float16,0,0.03886559903621674
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,8,128,1,float16,fp8,0,0.035550400614738464
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,8,128,1,fp8,fp8,0,0.03583039939403534
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,1,128,1,float16,float16,0,0.34982240200042725
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,1,128,1,float16,float16,0,0.05791680216789245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,64,1,128,1,float16,fp8,0,1.86364803314209
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,64,1,128,1,float16,float16,0,1.9477472305297852
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,64,1,128,1,fp8,fp8,0,1.8622560501098633
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,4,128,1,fp8,fp8,0,0.055567997694015506
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,64,128,1,float16,float16,0,0.14170559644699096
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,64,2,128,1,float16,float16,0,1.967487907409668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,64,2,128,1,float16,fp8,0,1.858558464050293
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,64,2,128,1,fp8,fp8,0,1.8605663299560546
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,64,4,128,1,float16,fp8,0,1.858785629272461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,64,4,128,1,float16,float16,0,2.1201040267944338
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,64,4,128,1,fp8,fp8,0,1.8584175109863281
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,64,8,128,1,float16,fp8,0,1.8561744689941406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,64,8,128,1,float16,float16,0,2.0895759582519533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,64,1,128,1,float16,float16,0,1.0775568008422851
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,64,1,128,1,float16,fp8,0,0.9375231742858887
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,64,64,128,1,float16,fp8,0,1.4226256370544434
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,64,64,128,1,fp8,fp8,0,1.4186351776123047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,64,1,128,1,fp8,fp8,0,0.9368895530700684
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,64,2,128,1,float16,float16,0,1.0820416450500487
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,64,2,128,1,float16,fp8,0,1.025443172454834
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,64,2,128,1,fp8,fp8,0,0.9359999656677246
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,64,4,128,1,float16,float16,0,0.9863984107971191
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,64,4,128,1,float16,fp8,0,0.9358736038208008
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,64,4,128,1,fp8,fp8,0,0.9349151611328125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,64,8,128,1,float16,float16,0,1.1033072471618652
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,64,8,128,1,float16,fp8,0,0.9680543899536133
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,64,8,128,1,fp8,fp8,0,0.9357968330383301
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,64,64,128,1,fp8,fp8,0,0.7154607772827148
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,64,1,128,1,float16,float16,0,0.5292031764984131
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,64,1,128,1,float16,fp8,0,0.48358879089355467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,64,2,128,1,float16,float16,0,0.48307361602783205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,64,2,128,1,float16,fp8,0,0.5019824028015136
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,64,2,128,1,fp8,fp8,0,0.47682881355285645
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,64,64,128,1,float16,float16,0,1.682454490661621
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,64,4,128,1,float16,float16,0,0.4936960220336914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,64,4,128,1,float16,fp8,0,0.4900335788726807
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,64,4,128,1,fp8,fp8,0,0.4759039878845215
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,64,8,128,1,float16,float16,0,0.5176767826080322
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,64,8,128,1,float16,fp8,0,0.47469282150268555
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,64,8,128,1,fp8,fp8,0,0.479307222366333
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,64,64,128,1,float16,fp8,0,0.36544959545135497
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,64,1,128,1,float16,float16,0,0.24946401119232178
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,64,64,128,1,fp8,fp8,0,0.36520159244537354
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,64,1,128,1,float16,fp8,0,0.2444080114364624
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,64,1,128,1,fp8,fp8,0,0.2470463991165161
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,64,2,128,1,float16,float16,0,0.24788320064544678
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,64,2,128,1,float16,fp8,0,0.24444160461425782
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,64,2,128,1,fp8,fp8,0,0.24675679206848145
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,64,64,128,1,float16,fp8,0,0.7160352230072021
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,64,4,128,1,float16,fp8,0,0.2447455883026123
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,64,4,128,1,fp8,fp8,0,0.24667201042175294
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,64,8,128,1,float16,fp8,0,0.24495360851287842
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,64,8,128,1,float16,float16,0,0.2624111890792847
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,64,8,128,1,fp8,fp8,0,0.2469104051589966
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,64,64,128,1,float16,float16,0,0.21424319744110107
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,64,1,128,1,float16,float16,0,0.13321759700775146
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,64,64,128,1,fp8,fp8,0,0.19104479551315307
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,64,1,128,1,float16,fp8,0,0.13015520572662354
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,64,1,128,1,fp8,fp8,0,0.13017280101776124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,64,8,128,1,fp8,fp8,0,1.9880624771118165
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,64,2,128,1,float16,float16,0,0.13260960578918457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,64,2,128,1,float16,fp8,0,0.13021440505981446
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,64,2,128,1,fp8,fp8,0,0.13027520179748536
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,64,4,128,1,float16,float16,0,0.13500640392303467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,64,4,128,1,fp8,fp8,0,0.1302175998687744
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,64,8,128,1,float16,float16,0,0.14007999897003173
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,64,64,128,1,float16,float16,0,0.4195119857788086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,64,8,128,1,float16,fp8,0,0.1308303952217102
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,64,8,128,1,fp8,fp8,0,0.13015520572662354
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,64,64,128,1,float16,float16,0,0.11538560390472412
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,64,64,128,1,float16,fp8,0,0.10264159440994262
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,64,64,128,1,fp8,fp8,0,0.10314879417419434
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,64,1,128,1,float16,float16,0,0.07460319995880127
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,64,1,128,1,float16,fp8,0,0.07169119715690613
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,64,1,128,1,fp8,fp8,0,0.07128639817237854
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,64,2,128,1,float16,float16,0,0.07540320158004761
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,64,2,128,1,float16,fp8,0,0.07142239809036255
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,64,2,128,1,fp8,fp8,0,0.0717408001422882
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,64,64,128,1,float16,float16,0,0.8333264350891113
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,64,4,128,1,float16,float16,0,0.25305120944976806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,64,4,128,1,float16,fp8,0,0.07240960001945496
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,64,4,128,1,fp8,fp8,0,0.07211840152740479
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,64,8,128,1,float16,float16,0,0.07956799864768982
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,64,8,128,1,fp8,fp8,0,0.07247999906539918
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,64,64,128,1,float16,float16,0,0.0649616003036499
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,64,64,128,1,float16,fp8,0,0.057657599449157715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,64,1,128,1,fp8,fp8,0,0.4755296230316162
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,64,1,128,1,float16,float16,0,0.043305599689483644
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,64,64,128,1,float16,fp8,0,0.19037599563598634
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,64,1,128,1,fp8,fp8,0,0.04115679860115051
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,64,2,128,1,float16,float16,0,0.04303680062294006
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,64,2,128,1,float16,fp8,0,0.04119519889354706
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,64,2,128,1,fp8,fp8,0,0.04110240042209625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,64,4,128,1,float16,float16,0,0.043326398730278014
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,64,4,128,1,float16,fp8,0,0.04114879965782166
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,64,4,128,1,fp8,fp8,0,0.04118399918079376
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,64,8,128,1,float16,float16,0,0.044977599382400514
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,64,8,128,1,float16,fp8,0,0.041177600622177124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,64,8,128,1,fp8,fp8,0,0.041249600052833554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,64,64,128,1,float16,float16,0,0.036375999450683594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,64,64,128,1,float16,fp8,0,0.034376001358032225
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,64,64,128,1,fp8,fp8,0,0.03490239977836609
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,64,1,128,1,float16,float16,0,0.02875039875507355
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,64,1,128,1,float16,fp8,0,0.02678399980068207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,64,1,128,1,fp8,fp8,0,0.026785600185394286
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,64,2,128,1,float16,float16,0,0.028934401273727418
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,64,4,128,1,float16,fp8,0,0.1302448034286499
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,64,2,128,1,fp8,fp8,0,0.02680160105228424
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,64,4,128,1,float16,float16,0,0.02879680097103119
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,64,4,128,1,float16,fp8,0,0.026931199431419372
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,64,4,128,1,fp8,fp8,0,0.026790401339530943
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,64,8,128,1,float16,float16,0,0.028905600309371948
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,64,8,128,1,float16,fp8,0,0.026761600375175477
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,64,2,128,1,float16,fp8,0,0.02688640058040619
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,64,8,128,1,fp8,fp8,0,0.026815998554229736
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,64,64,128,1,float16,float16,0,0.020641599595546723
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,64,64,128,1,fp8,fp8,0,0.0207056000828743
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,64,1,128,1,float16,float16,0,0.018377600610256194
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,64,1,128,1,float16,fp8,0,0.016667200624942778
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,64,1,128,1,fp8,fp8,0,0.01663520038127899
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,64,2,128,1,float16,float16,0,0.017910400032997133
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,64,2,128,1,float16,fp8,0,0.01656000018119812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,64,2,128,1,fp8,fp8,0,0.016612799465656282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,64,64,128,1,float16,fp8,0,0.020720000565052032
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,64,4,128,1,float16,float16,0,0.018607999384403228
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,64,4,128,1,float16,fp8,0,0.017260800302028655
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,64,8,128,1,float16,float16,0,0.01860159933567047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,64,8,128,1,float16,fp8,0,0.01735839992761612
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,64,8,128,1,fp8,fp8,0,0.016910399496555328
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,64,4,128,1,float16,float16,0,0.07629920244216919
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,64,8,128,1,float16,fp8,0,0.07232480049133301
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,64,1,128,1,float16,float16,0,0.7504208087921143
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,64,1,128,1,float16,fp8,0,0.7618368148803711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,64,64,128,1,fp8,fp8,0,0.057548797130584715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,64,1,128,1,float16,fp8,0,0.04118880033493042
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,64,2,128,1,float16,float16,0,0.7612768173217773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,64,2,128,1,float16,fp8,0,0.758732795715332
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,64,2,128,1,fp8,fp8,0,0.7602303981781006
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,64,4,128,1,float16,float16,0,0.7790207862854004
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,64,4,128,1,float16,fp8,0,0.7592688083648682
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,64,4,128,1,fp8,fp8,0,0.7602928161621094
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,64,8,128,1,float16,float16,0,0.8295503616333008
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,64,8,128,1,float16,fp8,0,0.7587552070617676
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,64,64,128,1,float16,float16,0,0.7259232044219971
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,64,8,128,1,fp8,fp8,0,0.7744736194610595
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,64,64,128,1,float16,fp8,0,0.6225791931152344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,64,1,128,1,float16,float16,0,0.40184640884399414
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,64,64,128,1,fp8,fp8,0,0.6262623786926269
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,64,1,128,1,float16,fp8,0,0.3861439943313599
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,64,1,128,1,fp8,fp8,0,0.39548640251159667
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,64,2,128,1,float16,float16,0,0.38432319164276124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,64,2,128,1,float16,fp8,0,0.3862895965576172
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,64,4,128,1,fp8,fp8,0,0.016972799599170686
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,64,2,128,1,fp8,fp8,0,0.39578239917755126
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,64,4,128,1,float16,fp8,0,0.3860752105712891
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,64,4,128,1,fp8,fp8,0,0.39864161014556887
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,64,8,128,1,float16,float16,0,0.41484642028808594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,64,8,128,1,float16,fp8,0,0.3862384080886841
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,64,8,128,1,fp8,fp8,0,0.3872143983840942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,64,64,128,1,float16,float16,0,0.368120002746582
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,64,1,128,1,float16,float16,0,0.1990928053855896
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,64,64,128,1,float16,fp8,0,0.3165103912353516
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,64,64,128,1,fp8,fp8,0,0.3266576051712036
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,64,1,128,1,fp8,fp8,0,0.7623295783996582
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,64,2,128,1,float16,float16,0,0.20021440982818603
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,64,1,128,1,fp8,fp8,0,0.20119359493255615
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,64,2,128,1,float16,fp8,0,0.19782719612121583
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,64,2,128,1,fp8,fp8,0,0.2043936014175415
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,64,4,128,1,float16,float16,0,0.20470879077911378
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,64,4,128,1,float16,fp8,0,0.19950239658355712
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,64,8,128,1,float16,fp8,0,0.20411200523376466
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,64,8,128,1,fp8,fp8,0,0.1998080015182495
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,64,4,128,1,fp8,fp8,0,0.2007296085357666
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,64,64,128,1,float16,fp8,0,0.16433279514312743
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,64,64,128,1,fp8,fp8,0,0.16477760076522827
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,64,1,128,1,float16,float16,0,0.10722240209579467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,64,1,128,1,float16,fp8,0,0.10411200523376465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,64,1,128,1,fp8,fp8,0,0.10471839904785156
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,64,2,128,1,float16,float16,0,0.10644799470901489
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,64,2,128,1,float16,fp8,0,0.10508960485458374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,64,2,128,1,fp8,fp8,0,0.1051632046699524
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,64,4,128,1,float16,float16,0,0.10872479677200317
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,64,4,128,1,float16,fp8,0,0.10477279424667359
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,64,4,128,1,fp8,fp8,0,0.105075204372406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,64,8,128,1,float16,fp8,0,0.10557440519332886
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,64,8,128,1,fp8,fp8,0,0.10510879755020142
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,64,64,128,1,float16,fp8,0,0.08758720159530639
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,64,64,128,1,fp8,fp8,0,0.08740640282630921
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,64,1,128,1,float16,float16,0,0.059084802865982056
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,64,1,128,1,float16,fp8,0,0.05582879781723023
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,64,4,128,1,float16,float16,0,0.3944704055786133
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,64,1,128,1,fp8,fp8,0,0.055720001459121704
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,64,2,128,1,float16,float16,0,0.05984799861907959
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,64,2,128,1,float16,fp8,0,0.055622398853302
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,64,2,128,1,fp8,fp8,0,0.05626239776611328
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,64,4,128,1,float16,float16,0,0.06075199842453003
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,64,4,128,1,fp8,fp8,0,0.05664160251617432
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,64,8,128,1,float16,float16,0,0.06308640241622925
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,64,8,128,1,float16,fp8,0,0.05754079818725586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,64,8,128,1,fp8,fp8,0,0.05743520259857178
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,64,64,128,1,float16,float16,0,0.057764798402786255
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,64,64,128,1,float16,fp8,0,0.05132160186767578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,64,8,128,1,float16,float16,0,0.2143887996673584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,64,1,128,1,float16,float16,0,0.035020801424980166
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,64,64,128,1,float16,float16,0,0.18849920034408568
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,64,1,128,1,float16,fp8,0,0.03493599891662598
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,64,2,128,1,float16,float16,0,0.03526720106601715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,64,2,128,1,float16,fp8,0,0.03498719930648804
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,64,2,128,1,fp8,fp8,0,0.034708800911903384
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,64,4,128,1,float16,float16,0,0.036689600348472594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,64,4,128,1,float16,fp8,0,0.03457759916782379
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,64,4,128,1,fp8,fp8,0,0.035020801424980166
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,64,8,128,1,float16,float16,0,0.037041598558425905
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,64,8,128,1,float16,fp8,0,0.03493120074272156
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,64,8,128,1,fp8,fp8,0,0.03458879888057709
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,64,64,128,1,float16,float16,0,0.029417601227760316
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,64,64,128,1,fp8,fp8,0,0.02886880040168762
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,64,1,128,1,float16,float16,0,0.021143999695777894
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,64,8,128,1,float16,float16,0,0.11395200490951538
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,64,1,128,1,float16,fp8,0,0.020691199600696562
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,64,1,128,1,fp8,fp8,0,0.02067680060863495
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,64,64,128,1,float16,float16,0,0.10082240104675293
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,64,2,128,1,float16,float16,0,0.02266400009393692
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,64,2,128,1,float16,fp8,0,0.020707200467586517
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,64,64,128,1,float16,fp8,0,0.028799998760223388
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,64,2,128,1,fp8,fp8,0,0.02062239944934845
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,64,4,128,1,float16,fp8,0,0.020827199518680572
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,64,8,128,1,float16,float16,0,0.022675199806690215
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,64,8,128,1,float16,fp8,0,0.02075680047273636
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,64,8,128,1,fp8,fp8,0,0.020656000077724456
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,64,64,128,1,float16,float16,0,0.01828159987926483
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,64,64,128,1,float16,fp8,0,0.018619200587272643
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,64,64,128,1,fp8,fp8,0,0.018671999871730804
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,64,1,128,1,float16,float16,0,0.014496000111103058
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,64,1,128,1,float16,fp8,0,0.014582400023937226
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,64,1,128,1,fp8,fp8,0,0.01456640064716339
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,64,2,128,1,float16,float16,0,0.014555199444293976
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,64,2,128,1,float16,fp8,0,0.01451680064201355
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,64,4,128,1,float16,float16,0,0.014481599628925323
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,64,4,128,1,float16,fp8,0,0.014569599926471711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,64,4,128,1,fp8,fp8,0,0.014494399726390838
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,64,8,128,1,float16,float16,0,0.014664000272750855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,64,8,128,1,float16,fp8,0,0.014580799639225006
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,64,8,128,1,fp8,fp8,0,0.014553600549697876
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,64,64,128,1,float16,float16,0,0.014630399644374847
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,64,64,128,1,float16,fp8,0,0.01459839940071106
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,64,4,128,1,float16,fp8,0,0.05578399896621704
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,64,64,128,1,fp8,fp8,0,0.01459999978542328
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,64,1,128,1,float16,float16,0,0.013483199477195739
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,64,1,128,1,float16,fp8,0,0.19885920286178588
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,64,1,128,1,float16,fp8,0,0.012540799379348756
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,64,1,128,1,fp8,fp8,0,0.012510399520397186
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,64,2,128,1,float16,float16,0,0.014451199769973755
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,64,2,128,1,float16,fp8,0,0.0125231996178627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,64,2,128,1,fp8,fp8,0,0.012516799569129943
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,64,4,128,1,float16,float16,0,0.014457599818706512
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,64,4,128,1,float16,fp8,0,0.01250080019235611
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,64,4,128,1,fp8,fp8,0,0.012583999335765839
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,64,8,128,1,float16,float16,0,0.014473600685596466
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,64,8,128,1,float16,fp8,0,0.012548799812793731
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,64,8,128,1,fp8,fp8,0,0.013046400249004364
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,64,64,128,1,fp8,fp8,0,0.0514240026473999
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,64,1,128,1,fp8,fp8,0,0.03445920050144195
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,64,1,128,1,float16,float16,0,0.4736015796661377
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,64,1,128,1,float16,fp8,0,0.480617618560791
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,64,1,128,1,fp8,fp8,0,0.4797679901123047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,64,2,128,1,float16,float16,0,0.4751215934753418
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,64,2,128,1,float16,fp8,0,0.48053760528564454
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,64,2,128,1,fp8,fp8,0,0.4807472229003906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,64,4,128,1,float16,float16,0,0.4837007999420166
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,64,4,128,1,float16,fp8,0,0.48091678619384765
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,64,4,128,1,fp8,fp8,0,0.020747199654579163
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,64,4,128,1,fp8,fp8,0,0.4799056053161621
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,64,8,128,1,float16,float16,0,0.5039584159851074
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,64,8,128,1,float16,fp8,0,0.47856478691101073
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,64,8,128,1,fp8,fp8,0,0.4803135871887207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,64,64,128,1,float16,fp8,0,0.36401278972625734
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,64,1,128,1,float16,float16,0,0.24444479942321778
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,64,1,128,1,float16,fp8,0,0.24727199077606202
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,64,1,128,1,fp8,fp8,0,0.24624478816986084
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,64,2,128,1,float16,float16,0,0.24405760765075685
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,64,2,128,1,float16,fp8,0,0.24669280052185058
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,64,2,128,1,fp8,fp8,0,0.24609119892120362
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,64,4,128,1,float16,float16,0,0.2485424041748047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,64,4,128,1,float16,fp8,0,0.2464576005935669
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,64,4,128,1,fp8,fp8,0,0.24632480144500732
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,64,8,128,1,float16,float16,0,0.25735840797424314
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,64,8,128,1,float16,fp8,0,0.24657440185546875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,64,8,128,1,fp8,fp8,0,0.24623360633850097
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,64,64,128,1,float16,fp8,0,0.18766239881515503
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,64,64,128,1,fp8,fp8,0,0.18700159788131715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,64,1,128,1,float16,float16,0,0.12876479625701903
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,64,1,128,1,float16,fp8,0,0.1278175950050354
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,64,1,128,1,fp8,fp8,0,0.12846399545669557
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,64,2,128,1,float16,float16,0,0.12873120307922364
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,64,2,128,1,float16,fp8,0,0.12822240591049194
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,64,2,128,1,fp8,fp8,0,0.1280351996421814
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,64,4,128,1,float16,float16,0,0.13154560327529907
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,64,4,128,1,float16,fp8,0,0.12819039821624756
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,64,4,128,1,fp8,fp8,0,0.12857600450515747
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,64,4,128,1,float16,float16,0,0.022784000635147093
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,64,8,128,1,float16,fp8,0,0.12857919931411743
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,64,8,128,1,fp8,fp8,0,0.12798880338668822
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,64,64,128,1,float16,float16,0,0.1074112057685852
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,64,64,128,1,float16,fp8,0,0.09852160215377807
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,64,64,128,1,fp8,fp8,0,0.09881280064582824
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,64,64,128,1,float16,float16,0,0.40058560371398927
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,64,2,128,1,fp8,fp8,0,0.014611199498176575
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,64,1,128,1,fp8,fp8,0,0.06817280054092408
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,64,2,128,1,float16,float16,0,0.06943039894104004
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,64,64,128,1,fp8,fp8,0,0.3643487930297852
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,64,2,128,1,float16,fp8,0,0.06825119853019715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,64,1,128,1,float16,fp8,0,0.0679311990737915
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,64,4,128,1,float16,float16,0,0.07201120257377625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,64,8,128,1,float16,float16,0,0.073716801404953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,64,2,128,1,fp8,fp8,0,0.06796960234642029
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,64,8,128,1,float16,fp8,0,0.06865919828414917
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,64,64,128,1,float16,float16,0,0.05974079966545105
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,64,4,128,1,fp8,fp8,0,0.06835839748382569
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,64,64,128,1,float16,fp8,0,0.05346879959106445
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,64,64,128,1,fp8,fp8,0,0.053465598821640016
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,64,1,128,1,float16,float16,0,0.038555198907852174
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,64,1,128,1,float16,fp8,0,0.03710399866104126
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,64,8,128,1,fp8,fp8,0,0.06804320216178894
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,64,1,128,1,fp8,fp8,0,0.03709119856357575
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,64,2,128,1,float16,float16,0,0.03847520053386688
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,64,2,128,1,float16,fp8,0,0.03737280070781708
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,64,4,128,1,float16,float16,0,0.039062398672103885
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,64,4,128,1,float16,fp8,0,0.037176001071929934
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,64,64,128,1,float16,float16,0,0.20630240440368652
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,64,8,128,1,float16,float16,0,0.040670400857925414
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,64,8,128,1,float16,fp8,0,0.037264001369476316
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,64,8,128,1,fp8,fp8,0,0.03771679997444153
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,64,64,128,1,float16,fp8,0,0.03094559907913208
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,64,64,128,1,fp8,fp8,0,0.030980798602104186
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,64,1,128,1,float16,float16,0,0.02476000040769577
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,64,1,128,1,float16,fp8,0,0.023099200427532197
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,64,1,128,1,fp8,fp8,0,0.023871999979019166
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,64,2,128,1,float16,float16,0,0.024758400022983552
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,64,2,128,1,float16,fp8,0,0.023763200640678404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,64,2,128,1,fp8,fp8,0,0.02380640059709549
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,64,4,128,1,float16,float16,0,0.024831999838352204
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,64,4,128,1,float16,fp8,0,0.02358720004558563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,64,4,128,1,fp8,fp8,0,0.023203200101852416
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,64,8,128,1,float16,float16,0,0.0247296005487442
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,64,8,128,1,float16,fp8,0,0.023060800135135652
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,64,8,128,1,fp8,fp8,0,0.023574399948120116
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,64,8,128,1,float16,float16,0,0.13628799915313722
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,64,64,128,1,float16,float16,0,0.01867839992046356
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,64,64,128,1,float16,fp8,0,0.0186831995844841
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,64,64,128,1,fp8,fp8,0,0.018691200017929076
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,64,1,128,1,float16,float16,0,0.015779200196266174
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,64,1,128,1,float16,fp8,0,0.014593599736690522
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,64,2,128,1,float16,float16,0,0.016598400473594666
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,64,2,128,1,float16,fp8,0,0.01451680064201355
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,64,2,128,1,fp8,fp8,0,0.015779200196266174
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,64,4,128,1,float16,float16,0,0.015603199601173401
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,64,4,128,1,float16,fp8,0,0.015905599296092986
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,64,4,128,1,fp8,fp8,0,0.014563199877738953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,64,1,128,1,float16,float16,0,0.0699184000492096
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,64,8,128,1,float16,float16,0,0.016551999747753142
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,64,8,128,1,float16,fp8,0,0.016008000075817107
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,64,64,128,1,float16,float16,0,0.01406240016222
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,64,64,128,1,float16,fp8,0,0.01252480000257492
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,64,64,128,1,fp8,fp8,0,0.012534399330615998
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,64,1,128,1,float16,float16,0,0.010503999888896942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,64,4,128,1,float16,fp8,0,0.0684656023979187
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,64,1,128,1,float16,fp8,0,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,64,1,128,1,fp8,fp8,0,0.010524799674749374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,64,2,128,1,float16,float16,0,0.011025600135326385
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,64,2,128,1,float16,fp8,0,0.010788799822330475
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,64,2,128,1,fp8,fp8,0,0.010465600341558457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,64,4,128,1,float16,float16,0,0.01051200032234192
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,64,4,128,1,float16,fp8,0,0.010571199655532836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,64,4,128,1,fp8,fp8,0,0.010503999888896942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,64,8,128,1,float16,float16,0,0.012417600303888322
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,64,8,128,1,float16,fp8,0,0.010523200035095215
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,64,8,128,1,fp8,fp8,0,0.010545600205659866
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,64,64,128,1,float16,float16,0,0.012454400211572647
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,64,64,128,1,float16,fp8,0,0.01048159971833229
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,64,64,128,1,fp8,fp8,0,0.012428800016641617
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,64,1,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,64,1,128,1,float16,fp8,0,0.010387200117111205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,64,1,128,1,fp8,fp8,0,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,64,2,128,1,float16,float16,0,0.010476800054311753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,64,2,128,1,float16,fp8,0,0.010471999645233154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,64,2,128,1,fp8,fp8,0,0.010496000200510025
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,64,2,128,1,fp8,fp8,0,0.037136000394821164
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,64,4,128,1,float16,float16,0,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,64,4,128,1,float16,fp8,0,0.010500799864530563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,64,4,128,1,fp8,fp8,0,0.03713279962539673
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,64,8,128,1,float16,float16,0,0.010491199791431427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,64,8,128,1,float16,fp8,0,0.010494399815797806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,64,8,128,1,fp8,fp8,0,0.010465600341558457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,64,64,128,1,float16,float16,0,0.03300319910049439
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,64,1,128,1,float16,float16,0,0.39839839935302734
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,64,1,128,1,float16,fp8,0,0.3945631980895996
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,64,1,128,1,fp8,fp8,0,0.3944943904876709
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,64,2,128,1,float16,float16,0,0.4013088226318359
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,64,2,128,1,float16,fp8,0,0.39423840045928954
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,64,2,128,1,fp8,fp8,0,0.3943615913391113
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,64,4,128,1,float16,float16,0,0.404204797744751
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,64,4,128,1,float16,fp8,0,0.3938767910003662
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,64,1,128,1,fp8,fp8,0,0.015723200142383577
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,64,4,128,1,fp8,fp8,0,0.39365758895874026
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,64,8,128,1,float16,float16,0,0.41321759223937987
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,64,8,128,1,float16,fp8,0,0.39375839233398435
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,64,64,128,1,float16,float16,0,0.28213279247283934
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,64,64,128,1,float16,fp8,0,0.26174559593200686
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,64,64,128,1,fp8,fp8,0,0.26245119571685793
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,64,8,128,1,fp8,fp8,0,0.0144896000623703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,64,1,128,1,float16,fp8,0,0.20292000770568847
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,64,1,128,1,fp8,fp8,0,0.20264320373535155
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,64,2,128,1,float16,float16,0,0.2067487955093384
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,64,2,128,1,float16,fp8,0,0.20285120010375976
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,64,2,128,1,fp8,fp8,0,0.20294079780578614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,64,4,128,1,float16,float16,0,0.20846080780029297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,64,4,128,1,float16,fp8,0,0.20328478813171386
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,64,4,128,1,fp8,fp8,0,0.20295360088348388
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,64,8,128,1,float16,float16,0,0.21383199691772461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,64,8,128,1,float16,fp8,0,0.20231199264526367
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,64,64,128,1,float16,float16,0,0.14673279523849486
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,64,64,128,1,float16,fp8,0,0.13631680011749267
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,64,64,128,1,fp8,fp8,0,0.1361407995223999
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,64,1,128,1,float16,fp8,0,0.10618400573730469
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,64,1,128,1,fp8,fp8,0,0.1056704044342041
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,64,2,128,1,float16,float16,0,0.11002240180969239
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,64,2,128,1,float16,fp8,0,0.10627039670944213
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,64,2,128,1,fp8,fp8,0,0.1061903953552246
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,64,4,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,64,4,128,1,float16,float16,0,0.11160479784011841
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,64,4,128,1,float16,fp8,0,0.10631999969482422
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,64,4,128,1,fp8,fp8,0,0.10627520084381104
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,64,8,128,1,float16,float16,0,0.1139456033706665
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,64,8,128,1,float16,fp8,0,0.10679199695587158
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,64,64,128,1,float16,float16,0,0.07955200076103211
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,64,64,128,1,float16,fp8,0,0.07314079999923706
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,64,8,128,1,fp8,fp8,0,0.10611519813537598
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,64,64,128,1,fp8,fp8,0,0.07283679842948913
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,64,1,128,1,float16,fp8,0,0.0573743999004364
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,64,1,128,1,fp8,fp8,0,0.056955200433731076
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,64,2,128,1,float16,float16,0,0.06000639796257019
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,64,2,128,1,float16,fp8,0,0.056846398115158084
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,64,2,128,1,fp8,fp8,0,0.05712159872055054
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,64,4,128,1,float16,float16,0,0.0602512001991272
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,64,4,128,1,float16,fp8,0,0.05730720162391663
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,64,4,128,1,fp8,fp8,0,0.05636320114135742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,64,8,128,1,float16,float16,0,0.06219040155410767
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,64,8,128,1,float16,fp8,0,0.056612801551818845
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,64,8,128,1,fp8,fp8,0,0.05692800283432007
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,64,64,128,1,float16,fp8,0,0.03913759887218475
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,64,64,128,1,float16,float16,0,0.043198400735855104
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,64,64,128,1,fp8,fp8,0,0.039190399646759036
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,64,1,128,1,float16,float16,0,0.03384160101413727
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,64,1,128,1,float16,fp8,0,0.032313600182533264
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,64,1,128,1,fp8,fp8,0,0.032662400603294374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,64,2,128,1,float16,float16,0,0.03391520082950592
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,64,8,128,1,fp8,fp8,0,0.3933151960372925
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,64,4,128,1,float16,float16,0,0.034534400701522826
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,64,1,128,1,float16,float16,0,0.2063391923904419
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,64,2,128,1,float16,fp8,0,0.03238719999790192
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,64,4,128,1,float16,fp8,0,0.03237600028514862
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,64,4,128,1,fp8,fp8,0,0.031553599238395694
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,64,8,128,1,float16,float16,0,0.03494080007076263
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,64,8,128,1,float16,fp8,0,0.03208479881286621
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,64,8,128,1,fp8,fp8,0,0.031780800223350524
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,64,64,128,1,float16,fp8,0,0.02476000040769577
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,64,64,128,1,fp8,fp8,0,0.024796800315380098
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,64,1,128,1,float16,float16,0,0.02215359956026077
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,64,1,128,1,float16,fp8,0,0.02075359970331192
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,64,1,128,1,fp8,fp8,0,0.020734399557113647
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,64,2,128,1,float16,float16,0,0.020815999805927278
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,64,2,128,1,float16,fp8,0,0.020729599893093108
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,64,2,128,1,fp8,fp8,0,0.02080159932374954
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,64,4,128,1,float16,float16,0,0.022649599611759184
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,64,8,128,1,fp8,fp8,0,0.20263199806213378
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,64,4,128,1,float16,fp8,0,0.020695999264717102
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,64,8,128,1,float16,float16,0,0.022353599965572356
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,64,8,128,1,float16,fp8,0,0.02072799950838089
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,64,1,128,1,float16,float16,0,0.11039199829101562
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,64,8,128,1,fp8,fp8,0,0.02059520035982132
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,64,64,128,1,float16,fp8,0,0.01656160056591034
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,64,1,128,1,float16,float16,0,0.01449120044708252
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,64,1,128,1,float16,fp8,0,0.014468799531459808
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,64,64,128,1,float16,float16,0,0.016628800332546233
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,64,1,128,1,fp8,fp8,0,0.014502400159835815
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,64,2,128,1,float16,float16,0,0.014548799395561219
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,64,2,128,1,fp8,fp8,0,0.014611199498176575
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,64,4,128,1,float16,float16,0,0.014555199444293976
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,64,4,128,1,float16,fp8,0,0.01454080045223236
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,64,4,128,1,fp8,fp8,0,0.014561599493026734
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,64,8,128,1,float16,float16,0,0.014552000164985656
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,64,1,128,1,float16,float16,0,0.06015679836273193
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,64,8,128,1,float16,fp8,0,0.014542399346828461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,64,8,128,1,fp8,fp8,0,0.014505599439144135
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,64,64,128,1,float16,float16,0,0.012515200674533844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,64,64,128,1,float16,fp8,0,0.012452799826860428
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,64,64,128,1,fp8,fp8,0,0.012518399953842163
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,64,1,128,1,float16,float16,0,0.010527999699115753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,64,1,128,1,float16,fp8,0,0.010491199791431427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,64,1,128,1,fp8,fp8,0,0.010462400317192078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,64,2,128,1,float16,float16,0,0.010487999767065048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,64,2,128,1,float16,fp8,0,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,64,2,128,1,fp8,fp8,0,0.01053759977221489
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,64,4,128,1,float16,float16,0,0.01045600026845932
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,64,4,128,1,float16,fp8,0,0.010480000078678131
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,64,4,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,64,8,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,64,8,128,1,float16,fp8,0,0.010385599732398988
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,64,8,128,1,fp8,fp8,0,0.010436800122261048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,64,64,128,1,float16,float16,0,0.011451199650764465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,64,64,128,1,fp8,fp8,0,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,64,1,128,1,float16,float16,0,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,64,1,128,1,float16,fp8,0,0.010406400263309478
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,64,2,128,1,fp8,fp8,0,0.03286559879779816
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,64,2,128,1,float16,float16,0,0.010409600287675857
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,64,2,128,1,float16,fp8,0,0.010440000146627427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,64,2,128,1,fp8,fp8,0,0.010382399708032609
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,64,64,128,1,float16,fp8,0,0.010462400317192078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,64,4,128,1,float16,float16,0,0.010751999914646149
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,64,4,128,1,float16,fp8,0,0.010390400141477584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,64,64,128,1,float16,float16,0,0.024817599356174468
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,64,4,128,1,fp8,fp8,0,0.010467199981212616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,64,8,128,1,float16,float16,0,0.010499200224876404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,64,8,128,1,float16,fp8,0,0.010424000024795533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,64,8,128,1,fp8,fp8,0,0.010471999645233154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,64,1,128,1,float16,float16,0,0.3563791990280151
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,64,1,128,1,fp8,fp8,0,0.3488640069961548
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,64,1,128,1,float16,fp8,0,0.3494688034057617
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,64,4,128,1,fp8,fp8,0,0.020710399746894835
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,64,2,128,1,float16,float16,0,0.3578320026397705
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,64,64,128,1,fp8,fp8,0,0.016510400176048278
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,64,2,128,1,fp8,fp8,0,0.34979679584503176
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,64,4,128,1,float16,fp8,0,0.3488287925720215
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,64,4,128,1,fp8,fp8,0,0.3498879909515381
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,64,2,128,1,float16,fp8,0,0.014502400159835815
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,64,8,128,1,float16,float16,0,0.3632159948348999
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,64,8,128,1,float16,fp8,0,0.3491904020309448
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,64,64,128,1,float16,float16,0,0.2214128017425537
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,64,8,128,1,fp8,fp8,0,0.34929280281066893
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,64,64,128,1,fp8,fp8,0,0.21087679862976075
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,64,1,128,1,float16,fp8,0,0.18042559623718263
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,64,1,128,1,fp8,fp8,0,0.17957760095596315
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,64,2,128,1,float16,float16,0,0.18567680120468139
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,64,2,128,1,float16,fp8,0,0.17978880405426026
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,64,2,128,1,fp8,fp8,0,0.17962080240249634
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,64,4,128,1,float16,float16,0,0.18684959411621094
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,64,4,128,1,float16,fp8,0,0.18017280101776123
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,64,4,128,1,fp8,fp8,0,0.18000320196151734
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,64,8,128,1,float16,float16,0,0.1889039993286133
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,64,8,128,1,float16,fp8,0,0.18039040565490722
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,64,8,128,1,fp8,fp8,0,0.17977119684219361
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,64,64,128,1,float16,float16,0,0.11718560457229614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,64,64,128,1,float16,fp8,0,0.11094080209732056
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,64,64,128,1,fp8,fp8,0,0.11128959655761719
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,64,1,128,1,float16,float16,0,0.0982159972190857
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,64,1,128,1,float16,fp8,0,0.09466400146484374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,64,1,128,1,fp8,fp8,0,0.09498879909515381
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,64,2,128,1,float16,float16,0,0.09917439818382263
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,64,2,128,1,float16,fp8,0,0.09476159811019898
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,64,1,128,1,fp8,fp8,0,0.010465600341558457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,64,2,128,1,fp8,fp8,0,0.09489279985427856
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,64,4,128,1,float16,float16,0,0.10001280307769775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,64,4,128,1,fp8,fp8,0,0.09456160068511962
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,64,8,128,1,float16,float16,0,0.1014240026473999
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,64,8,128,1,float16,fp8,0,0.09443839788436889
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,64,64,128,1,float16,float16,0,0.06384639739990235
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,64,64,128,1,float16,fp8,0,0.058659201860427855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,64,64,128,1,fp8,fp8,0,0.05834720134735107
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,64,1,128,1,float16,float16,0,0.05438719987869263
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,64,1,128,1,float16,fp8,0,0.05145440101623535
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,64,2,128,1,float16,fp8,0,0.34959840774536133
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,64,1,128,1,fp8,fp8,0,0.05165759921073913
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,64,4,128,1,float16,float16,0,0.3586335897445679
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,64,2,128,1,float16,float16,0,0.05460000038146973
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,64,2,128,1,float16,fp8,0,0.051648002862930295
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,64,2,128,1,fp8,fp8,0,0.051523202657699586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,64,4,128,1,float16,float16,0,0.05447040200233459
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,64,4,128,1,float16,fp8,0,0.05148320198059082
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,64,4,128,1,fp8,fp8,0,0.05153120160102844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,64,8,128,1,float16,float16,0,0.05540639758110046
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,64,8,128,1,float16,fp8,0,0.05133119821548462
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,64,64,128,1,float16,fp8,0,0.033004799485206605
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,64,64,128,1,fp8,fp8,0,0.033020800352096556
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,64,64,128,1,float16,fp8,0,0.20953121185302734
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,64,1,128,1,float16,float16,0,0.03154560029506683
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,64,1,128,1,float16,fp8,0,0.028951999545097352
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,64,1,128,1,fp8,fp8,0,0.029156801104545594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,64,2,128,1,float16,float16,0,0.0311055988073349
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,64,2,128,1,float16,fp8,0,0.02902719974517822
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,64,2,128,1,fp8,fp8,0,0.028921601176261903
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,64,4,128,1,float16,float16,0,0.031358399987220766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,64,4,128,1,float16,fp8,0,0.028963199257850646
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,64,4,128,1,fp8,fp8,0,0.02942720055580139
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,64,8,128,1,float16,float16,0,0.031788799166679385
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,64,8,128,1,float16,fp8,0,0.029153600335121155
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,64,8,128,1,fp8,fp8,0,0.028910401463508605
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,64,64,128,1,float16,fp8,0,0.02067359983921051
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,64,64,128,1,float16,float16,0,0.022686399519443512
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,64,64,128,1,fp8,fp8,0,0.021062399446964263
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,64,1,128,1,float16,float16,0,0.02062560021877289
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,64,1,128,1,float16,fp8,0,0.019431999325752257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,64,2,128,1,float16,float16,0,0.020691199600696562
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,64,2,128,1,float16,fp8,0,0.019814400374889372
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,64,2,128,1,fp8,fp8,0,0.01993280053138733
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,64,4,128,1,float16,float16,0,0.0206496000289917
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,64,4,128,1,float16,fp8,0,0.01926559954881668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,64,4,128,1,fp8,fp8,0,0.019827200472354888
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,64,4,128,1,float16,fp8,0,0.09480159878730773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,64,8,128,1,float16,float16,0,0.0206496000289917
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,64,8,128,1,float16,fp8,0,0.018667200207710268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,64,8,128,1,fp8,fp8,0,0.018719999492168425
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,64,64,128,1,float16,float16,0,0.016227200627326965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,64,64,128,1,float16,fp8,0,0.014484800398349762
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,64,8,128,1,fp8,fp8,0,0.09468160271644592
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,64,64,128,1,fp8,fp8,0,0.014524799585342408
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,64,1,128,1,float16,float16,0,0.013752000033855438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,64,1,128,1,float16,fp8,0,0.013860799372196198
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,64,1,128,1,fp8,fp8,0,0.01420000046491623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,64,2,128,1,float16,float16,0,0.013659200072288514
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,64,2,128,1,float16,fp8,0,0.014228799939155578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,64,2,128,1,fp8,fp8,0,0.014419199526309967
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,64,4,128,1,float16,fp8,0,0.01404159963130951
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,64,4,128,1,fp8,fp8,0,0.014132800698280334
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,64,8,128,1,float16,float16,0,0.014571200311183929
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,64,8,128,1,float16,fp8,0,0.014336000382900237
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,64,8,128,1,fp8,fp8,0,0.014398400485515595
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,64,64,128,1,float16,float16,0,0.012772800028324127
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,64,64,128,1,float16,fp8,0,0.010502400249242783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,64,64,128,1,fp8,fp8,0,0.010454399883747101
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,64,1,128,1,float16,float16,0,0.01048320010304451
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,64,1,128,1,fp8,fp8,0,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,64,1,128,1,float16,fp8,0,0.010500799864530563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,64,2,128,1,float16,float16,0,0.0104032002389431
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,64,2,128,1,float16,fp8,0,0.010414399951696397
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,64,2,128,1,fp8,fp8,0,0.01048159971833229
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,64,4,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,64,8,128,1,fp8,fp8,0,0.05151680111885071
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,64,4,128,1,fp8,fp8,0,0.010470400005578995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,64,1,128,1,float16,float16,0,0.18581119775772095
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,64,8,128,1,float16,float16,0,0.010926400125026704
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,64,8,128,1,float16,fp8,0,0.010358399897813796
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,64,8,128,1,fp8,fp8,0,0.01043199971318245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,64,64,128,1,float16,float16,0,0.011806400120258331
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,64,64,128,1,float16,fp8,0,0.010385599732398988
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,64,64,128,1,fp8,fp8,0,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,64,1,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,64,1,128,1,float16,fp8,0,0.009607999771833419
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,64,1,128,1,fp8,fp8,0,0.010409600287675857
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,64,2,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,64,2,128,1,fp8,fp8,0,0.010375999659299851
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,64,4,128,1,float16,float16,0,0.010470400005578995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,64,4,128,1,float16,fp8,0,0.010396800190210342
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,64,4,128,1,fp8,fp8,0,0.010505600273609162
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,64,8,128,1,float16,float16,0,0.010396800190210342
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,64,8,128,1,fp8,fp8,0,0.010412800312042236
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,64,1,128,1,fp8,fp8,0,0.019150400161743165
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,64,1,128,1,float16,float16,0,0.3492944002151489
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,64,1,128,1,float16,fp8,0,0.33226718902587893
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,64,1,128,1,fp8,fp8,0,0.3332367897033691
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,64,2,128,1,float16,float16,0,0.34873759746551514
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,64,2,128,1,float16,fp8,0,0.33186399936676025
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,64,2,128,1,fp8,fp8,0,0.3325711965560913
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,64,4,128,1,float16,float16,0,0.01345600038766861
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,64,4,128,1,float16,fp8,0,0.3322751998901367
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,64,4,128,1,fp8,fp8,0,0.33179359436035155
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,64,8,128,1,float16,float16,0,0.3520416021347046
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,64,8,128,1,float16,fp8,0,0.33252639770507814
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,64,8,128,1,fp8,fp8,0,0.3324143886566162
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,64,64,128,1,float16,float16,0,0.034827199578285215
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,64,64,128,1,float16,float16,0,0.19756319522857665
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,64,64,128,1,float16,fp8,0,0.18729759454727174
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,64,64,128,1,fp8,fp8,0,0.18713120222091675
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,64,1,128,1,float16,float16,0,0.18020800352096558
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,64,2,128,1,float16,fp8,0,0.009723199903964997
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,64,1,128,1,fp8,fp8,0,0.17138400077819824
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,64,2,128,1,float16,float16,0,0.18023359775543213
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,64,8,128,1,float16,fp8,0,0.010407999902963639
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,64,2,128,1,fp8,fp8,0,0.17154719829559326
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,64,4,128,1,float16,float16,0,0.18144160509109497
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,64,4,128,1,fp8,fp8,0,0.17166719436645508
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,64,4,128,1,float16,fp8,0,0.1707087993621826
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,64,8,128,1,float16,fp8,0,0.1713871955871582
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,64,8,128,1,fp8,fp8,0,0.1712015986442566
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,64,64,128,1,float16,fp8,0,0.09827200174331666
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,64,64,128,1,float16,float16,0,0.10582720041275025
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,64,64,128,1,fp8,fp8,0,0.09822880029678345
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,64,1,128,1,float16,float16,0,0.09673759937286378
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,64,1,128,1,float16,fp8,0,0.09110239744186402
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,64,1,128,1,fp8,fp8,0,0.09149119853973389
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,64,2,128,1,float16,float16,0,0.096697598695755
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,64,2,128,1,float16,fp8,0,0.09120479822158814
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,64,2,128,1,fp8,fp8,0,0.09070559740066528
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,64,4,128,1,float16,float16,0,0.347870397567749
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,64,4,128,1,float16,float16,0,0.09636160135269164
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,64,4,128,1,float16,fp8,0,0.09080160260200501
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,64,4,128,1,fp8,fp8,0,0.0908959984779358
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,64,8,128,1,float16,float16,0,0.09710720181465149
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,64,8,128,1,float16,fp8,0,0.09089440107345581
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,64,8,128,1,fp8,fp8,0,0.09047840237617492
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,64,64,128,1,float16,float16,0,0.056595200300216676
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,64,64,128,1,float16,fp8,0,0.05345919728279114
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,64,64,128,1,fp8,fp8,0,0.05401920080184937
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,64,1,128,1,float16,float16,0,0.05407040119171143
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,64,1,128,1,float16,fp8,0,0.049753600358963014
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,64,1,128,1,fp8,fp8,0,0.05013599991798401
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,64,2,128,1,float16,float16,0,0.05415999889373779
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,64,2,128,1,float16,fp8,0,0.050097602605819705
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,64,2,128,1,fp8,fp8,0,0.04997600018978119
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,64,4,128,1,float16,float16,0,0.05442879796028137
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,64,4,128,1,fp8,fp8,0,0.05005120038986206
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,64,4,128,1,float16,fp8,0,0.049876800179481505
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,64,4,128,1,float16,fp8,0,0.010419200360774993
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,64,8,128,1,float16,float16,0,0.05452160239219665
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,64,8,128,1,float16,fp8,0,0.050382399559021
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,64,8,128,1,fp8,fp8,0,0.05002560019493103
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,64,64,128,1,float16,fp8,0,0.030929601192474364
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,64,64,128,1,fp8,fp8,0,0.030961599946022034
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,64,1,128,1,float16,float16,0,0.03121120035648346
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,64,1,128,1,float16,fp8,0,0.028907200694084166
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,64,1,128,1,fp8,fp8,0,0.02892799973487854
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,64,2,128,1,float16,float16,0,0.030987200140953065
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,64,2,128,1,float16,fp8,0,0.028865599632263185
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,64,2,128,1,fp8,fp8,0,0.028883200883865357
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,64,4,128,1,float16,float16,0,0.030985599756240843
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,64,4,128,1,float16,fp8,0,0.028860801458358766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,64,4,128,1,fp8,fp8,0,0.02887359857559204
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,64,8,128,1,float16,float16,0,0.031142398715019226
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,64,1,128,1,float16,fp8,0,0.1708016037940979
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,64,8,128,1,fp8,fp8,0,0.0289247989654541
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,64,64,128,1,float16,float16,0,0.020798400044441223
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,64,64,128,1,float16,fp8,0,0.02069920003414154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,64,64,128,1,fp8,fp8,0,0.020712000131607056
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,64,8,128,1,float16,fp8,0,0.029023998975753786
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,64,1,128,1,float16,fp8,0,0.018649600446224213
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,64,1,128,1,fp8,fp8,0,0.018694399297237395
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,64,2,128,1,float16,float16,0,0.020295999944210052
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,64,2,128,1,float16,fp8,0,0.01887039989233017
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,64,4,128,1,float16,float16,0,0.02011200040578842
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,64,2,128,1,float16,fp8,0,0.1724015951156616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,64,4,128,1,float16,fp8,0,0.01876640021800995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,64,4,128,1,fp8,fp8,0,0.0186831995844841
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,64,8,128,1,float16,fp8,0,0.01887200027704239
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,64,64,128,1,float16,float16,0,0.015188799798488617
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,64,8,128,1,fp8,fp8,0,0.018799999356269838
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,64,64,128,1,float16,fp8,0,0.014548799395561219
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,64,8,128,1,float16,float16,0,0.18235839605331422
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,64,64,128,1,fp8,fp8,0,0.01377120018005371
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,64,1,128,1,float16,float16,0,0.014483200013637542
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,64,1,128,1,float16,fp8,0,0.013358399271965027
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,64,2,128,1,float16,float16,0,0.012828800082206725
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,64,2,128,1,float16,fp8,0,0.013692800700664521
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,64,2,128,1,fp8,fp8,0,0.013846400380134582
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,64,4,128,1,float16,fp8,0,0.0135343998670578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,64,4,128,1,fp8,fp8,0,0.01324480026960373
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,64,8,128,1,float16,float16,0,0.014575999975204468
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,64,8,128,1,float16,fp8,0,0.01318880021572113
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,64,8,128,1,fp8,fp8,0,0.013371199369430542
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,64,64,128,1,float16,float16,0,0.012488000094890594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,64,64,128,1,float16,fp8,0,0.010489600151777268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,64,64,128,1,fp8,fp8,0,0.010463999956846238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,64,1,128,1,float16,float16,0,0.010468800365924836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,64,1,128,1,float16,fp8,0,0.010443200170993806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,64,1,128,1,fp8,fp8,0,0.010476800054311753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,64,2,128,1,float16,float16,0,0.010489600151777268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,64,2,128,1,float16,fp8,0,0.010500799864530563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,64,2,128,1,fp8,fp8,0,0.010496000200510025
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,64,4,128,1,float16,float16,0,0.010470400005578995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,64,4,128,1,float16,fp8,0,0.010449600219726563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,64,4,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,64,8,128,1,float16,float16,0,0.010443200170993806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,64,8,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,64,8,128,1,fp8,fp8,0,0.010427200049161912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,64,64,128,1,float16,float16,0,0.032979199290275575
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,64,64,128,1,float16,fp8,0,0.01043199971318245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,64,64,128,1,fp8,fp8,0,0.010471999645233154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,64,1,128,1,float16,float16,0,0.01045600026845932
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,64,1,128,1,float16,fp8,0,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,64,1,128,1,float16,float16,0,0.020454399287700653
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,64,1,128,1,fp8,fp8,0,0.010443200170993806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,64,2,128,1,float16,float16,0,0.010102400183677673
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,64,2,128,1,fp8,fp8,0,0.01863359957933426
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,64,2,128,1,fp8,fp8,0,0.009486400336027146
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,64,8,128,1,float16,float16,0,0.020185600221157073
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,64,4,128,1,float16,fp8,0,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,64,4,128,1,fp8,fp8,0,0.010367999970912933
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,64,8,128,1,float16,float16,0,0.010411199927330018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,64,8,128,1,float16,fp8,0,0.010390400141477584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,64,8,128,1,fp8,fp8,0,0.010396800190210342
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,64,1,128,1,fp8,fp8,0,0.012535999715328216
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,64,4,128,1,float16,float16,0,0.013236799836158752
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,64,1,128,1,float16,float16,0,0.34357120990753176
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,64,1,128,1,float16,fp8,0,0.3237296104431152
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,64,2,128,1,float16,float16,0,0.34496159553527833
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,64,2,128,1,float16,fp8,0,0.3236304044723511
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,64,2,128,1,fp8,fp8,0,0.32236480712890625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,64,4,128,1,float16,float16,0,0.34664320945739746
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,64,4,128,1,float16,fp8,0,0.32326080799102785
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,64,4,128,1,fp8,fp8,0,0.3235743999481201
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,64,8,128,1,float16,float16,0,0.34486401081085205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,64,8,128,1,float16,fp8,0,0.3239871978759766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,64,64,128,1,float16,float16,0,0.17960480451583863
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,64,8,128,1,fp8,fp8,0,0.3231775999069214
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,64,64,128,1,float16,fp8,0,0.16801760196685792
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,64,64,128,1,fp8,fp8,0,0.1690000057220459
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,64,1,128,1,float16,fp8,0,0.16744799613952638
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,64,1,128,1,fp8,fp8,0,0.16690239906311036
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,64,2,128,1,float16,float16,0,0.1796720027923584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,64,2,128,1,float16,fp8,0,0.16748640537261963
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,64,2,128,1,fp8,fp8,0,0.16692479848861694
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,64,4,128,1,float16,float16,0,0.1795199990272522
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,64,4,128,1,float16,fp8,0,0.1672320008277893
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,64,64,128,1,float16,float16,0,0.010436800122261048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,64,4,128,1,fp8,fp8,0,0.16707999706268312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,64,8,128,1,float16,float16,0,0.17961759567260743
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,64,8,128,1,float16,fp8,0,0.16727999448776246
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,64,64,128,1,float16,float16,0,0.09791200160980225
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,64,64,128,1,float16,fp8,0,0.08995680212974548
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,64,64,128,1,fp8,fp8,0,0.08969119787216187
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,64,8,128,1,fp8,fp8,0,0.16768319606781007
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,64,1,128,1,float16,float16,0,0.0961023986339569
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,64,1,128,1,float16,fp8,0,0.089137601852417
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,64,2,128,1,float16,float16,0,0.09674400091171265
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,64,2,128,1,float16,fp8,0,0.08905439972877502
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,64,1,128,1,fp8,fp8,0,0.08934400081634522
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,64,2,128,1,float16,fp8,0,0.010416000336408614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,64,4,128,1,float16,float16,0,0.09690080285072326
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,64,4,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,64,4,128,1,fp8,fp8,0,0.08921440243721009
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,64,8,128,1,float16,fp8,0,0.08921120166778565
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,64,8,128,1,fp8,fp8,0,0.08861759901046753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,64,64,128,1,float16,float16,0,0.05461919903755188
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,64,64,128,1,float16,fp8,0,0.049395200610160825
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,64,64,128,1,fp8,fp8,0,0.04948799908161163
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,64,1,128,1,float16,float16,0,0.053611201047897336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,64,1,128,1,float16,fp8,0,0.04935680031776428
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,64,1,128,1,fp8,fp8,0,0.049332800507545474
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,64,2,128,1,float16,float16,0,0.05345919728279114
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,64,2,128,1,float16,fp8,0,0.04939039945602417
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,64,4,128,1,float16,float16,0,0.05382080078125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,64,4,128,1,float16,fp8,0,0.04932959973812103
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,64,1,128,1,fp8,fp8,0,0.322492790222168
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,64,4,128,1,fp8,fp8,0,0.049414399266242984
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,64,8,128,1,float16,float16,0,0.05397599935531616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,64,8,128,1,float16,fp8,0,0.04891999959945679
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,64,64,128,1,float16,float16,0,0.032046398520469664
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,64,64,128,1,float16,fp8,0,0.02884480059146881
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,64,1,128,1,float16,float16,0,0.030980798602104186
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,64,64,128,1,fp8,fp8,0,0.02887359857559204
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,64,1,128,1,float16,fp8,0,0.02884800136089325
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,64,1,128,1,fp8,fp8,0,0.0288783997297287
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,64,2,128,1,float16,float16,0,0.03094080090522766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,64,2,128,1,float16,fp8,0,0.028865599632263185
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,64,2,128,1,fp8,fp8,0,0.02881920039653778
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,64,4,128,1,float16,float16,0,0.030934399366378783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,64,4,128,1,float16,fp8,0,0.02882719933986664
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,64,4,128,1,fp8,fp8,0,0.02892960011959076
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,64,8,128,1,float16,float16,0,0.030849599838256837
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,64,8,128,1,float16,fp8,0,0.02887519896030426
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,64,1,128,1,float16,float16,0,0.1785632014274597
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,64,64,128,1,float16,float16,0,0.020742399990558623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,64,64,128,1,float16,fp8,0,0.01863519996404648
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,64,64,128,1,fp8,fp8,0,0.018643200397491455
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,64,1,128,1,float16,float16,0,0.019891199469566346
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,64,1,128,1,float16,fp8,0,0.018676799535751343
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,64,1,128,1,fp8,fp8,0,0.018561600148677825
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,64,2,128,1,float16,float16,0,0.01868959963321686
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,64,2,128,1,float16,fp8,0,0.018595199286937713
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,64,2,128,1,fp8,fp8,0,0.01860000044107437
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,64,4,128,1,float16,float16,0,0.019592000544071196
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,64,4,128,1,float16,fp8,0,0.018585599958896637
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,64,4,128,1,fp8,fp8,0,0.01860480010509491
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,64,2,128,1,fp8,fp8,0,0.08944320082664489
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,64,8,128,1,float16,float16,0,0.019398400187492372
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,64,4,128,1,float16,fp8,0,0.08891199827194214
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,64,8,128,1,fp8,fp8,0,0.018588800728321076
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,64,8,128,1,float16,float16,0,0.09549919962882995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,64,64,128,1,float16,float16,0,0.01459999978542328
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,64,64,128,1,fp8,fp8,0,0.012814399600028992
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,64,1,128,1,float16,fp8,0,0.012603199481964112
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,64,1,128,1,float16,float16,0,0.013142399489879608
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,64,1,128,1,fp8,fp8,0,0.012507200241088867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,64,2,128,1,float16,float16,0,0.014084799587726593
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,64,2,128,1,float16,fp8,0,0.012492799758911132
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,64,2,128,1,fp8,fp8,0,0.01250080019235611
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,64,4,128,1,float16,float16,0,0.013145600259304047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,64,4,128,1,float16,fp8,0,0.012531200051307678
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,64,4,128,1,fp8,fp8,0,0.012564800679683685
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,64,8,128,1,float16,float16,0,0.013153600692749023
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,64,2,128,1,fp8,fp8,0,0.04934720098972321
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,64,8,128,1,float16,fp8,0,0.012544000148773193
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,64,8,128,1,fp8,fp8,0,0.012535999715328216
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,64,64,128,1,float16,float16,0,0.012516799569129943
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,64,64,128,1,float16,fp8,0,0.0105103999376297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,64,64,128,1,fp8,fp8,0,0.01051200032234192
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,64,1,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,64,8,128,1,fp8,fp8,0,0.04939039945602417
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,64,1,128,1,float16,fp8,0,0.010487999767065048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,64,2,128,1,float16,float16,0,0.010446400195360184
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,64,2,128,1,float16,fp8,0,0.010451199859380722
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,64,2,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,64,4,128,1,float16,fp8,0,0.010462400317192078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,64,4,128,1,fp8,fp8,0,0.010497599840164185
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,64,8,128,1,float16,float16,0,0.010486400127410889
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,64,8,128,1,fp8,fp8,0,0.010499200224876404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,64,8,128,1,float16,fp8,0,0.010473600029945374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,64,64,128,1,float16,float16,0,0.010492800176143647
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,64,64,128,1,float16,fp8,0,0.010550399869680404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,64,64,128,1,fp8,fp8,0,0.009755200147628785
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,64,1,128,1,float16,fp8,0,0.008975999802350998
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,64,1,128,1,fp8,fp8,0,0.009487999975681305
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,64,8,128,1,fp8,fp8,0,0.02889760136604309
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,64,2,128,1,float16,fp8,0,0.009643200039863586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,64,2,128,1,float16,float16,0,0.009667199850082398
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,64,2,128,1,fp8,fp8,0,0.010054399818181991
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,64,4,128,1,float16,fp8,0,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,64,4,128,1,fp8,fp8,0,0.01043199971318245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,64,8,128,1,float16,float16,0,0.01048159971833229
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,64,8,128,1,float16,fp8,0,0.008816000074148178
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,64,8,128,1,fp8,fp8,0,0.009335999935865402
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,64,8,128,1,float16,fp8,0,0.018590399622917177
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,64,64,128,1,float16,fp8,0,0.012483199685811996
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,64,1,128,1,fp8,fp8,0,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,64,4,128,1,float16,float16,0,0.010520000010728836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,64,1,128,1,float16,float16,0,0.010446400195360184
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,64,4,128,1,float16,float16,0,0.010449600219726563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,1,128,1,float16,fp8,0,17.252011108398438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,1,128,1,fp8,fp8,0,17.195045471191406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,2,128,1,float16,fp8,0,17.15076446533203
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,2,128,1,fp8,fp8,0,17.31827392578125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,4,128,1,float16,fp8,0,17.46399841308594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,1,128,1,float16,float16,0,21.8139404296875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,2,128,1,float16,float16,0,21.964292907714842
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,4,128,1,float16,float16,0,22.375379943847655
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,48,128,1,float16,fp8,0,8.938774108886719
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,48,128,1,fp8,fp8,0,8.914105224609376
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,4,128,1,fp8,fp8,0,17.628623962402344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,1,128,1,float16,float16,0,11.167508697509765
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,8,128,1,float16,fp8,0,17.57978973388672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,8,128,1,fp8,fp8,0,17.6482177734375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,8,128,1,float16,float16,0,22.36741485595703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,1,128,1,float16,fp8,0,8.687388610839843
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,1,128,1,fp8,fp8,0,8.693163299560547
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,2,128,1,float16,fp8,0,8.828604888916015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,2,128,1,fp8,fp8,0,8.806609344482421
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,2,128,1,float16,float16,0,11.260797119140625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,4,128,1,float16,float16,0,11.353713226318359
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,4,128,1,float16,fp8,0,8.652040100097656
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,4,128,1,fp8,fp8,0,8.76409912109375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,48,128,1,float16,fp8,0,4.614742279052734
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,8,128,1,float16,fp8,0,8.908830261230468
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,48,128,1,fp8,fp8,0,4.536044692993164
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,8,128,1,fp8,fp8,0,8.84820785522461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,8,128,1,float16,float16,0,11.34222869873047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,1,128,1,float16,float16,0,5.558153533935547
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,1,128,1,float16,fp8,0,4.346795272827149
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,1,128,1,fp8,fp8,0,4.479032135009765
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,2,128,1,float16,fp8,0,4.421124649047852
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,2,128,1,float16,float16,0,5.6231952667236325
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,2,128,1,fp8,fp8,0,4.583260726928711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,4,128,1,float16,float16,0,5.635857772827149
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,4,128,1,float16,fp8,0,4.386308670043945
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,4,128,1,fp8,fp8,0,4.501036834716797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,8,128,1,float16,fp8,0,4.37438735961914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,8,128,1,float16,float16,0,5.597192001342774
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,48,128,1,float16,fp8,0,2.333734321594238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,8,128,1,fp8,fp8,0,4.523735809326172
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,48,128,1,fp8,fp8,0,2.3160415649414063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,1,128,1,float16,fp8,0,2.144623947143555
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,1,128,1,fp8,fp8,0,2.23024959564209
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,1,128,1,float16,float16,0,2.759921646118164
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,2,128,1,float16,fp8,0,2.224046325683594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,2,128,1,float16,float16,0,2.679969596862793
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,2,128,1,fp8,fp8,0,2.1560367584228515
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,4,128,1,float16,float16,0,2.71636962890625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,4,128,1,float16,fp8,0,2.425017547607422
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,4,128,1,fp8,fp8,0,2.228521537780762
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,8,128,1,float16,fp8,0,2.1627840042114257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,8,128,1,float16,float16,0,2.802569580078125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,8,128,1,fp8,fp8,0,2.5003936767578123
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,1,128,1,float16,fp8,0,10.155178833007813
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,1,128,1,fp8,fp8,0,9.980101013183594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,1,128,1,float16,float16,0,12.747058868408203
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,2,128,1,float16,fp8,0,10.286268615722657
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,2,128,1,float16,float16,0,12.894552612304688
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,2,128,1,fp8,fp8,0,10.176441955566407
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,4,128,1,float16,fp8,0,10.085964965820313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,4,128,1,fp8,fp8,0,10.350118255615234
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,4,128,1,float16,float16,0,13.015780639648437
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,8,128,1,float16,float16,0,12.942781066894531
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,48,128,1,float16,fp8,0,5.403763198852539
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,8,128,1,float16,fp8,0,10.14581756591797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,8,128,1,fp8,fp8,0,10.150004577636718
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,48,128,1,fp8,fp8,0,5.433084869384766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,1,128,1,float16,float16,0,6.3716896057128904
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,1,128,1,float16,fp8,0,5.085505676269531
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,1,128,1,fp8,fp8,0,5.056763076782227
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,2,128,1,float16,float16,0,6.431585693359375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,2,128,1,float16,fp8,0,5.098998260498047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,2,128,1,fp8,fp8,0,5.075096130371094
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,4,128,1,float16,float16,0,6.363119888305664
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,4,128,1,float16,fp8,0,5.133607864379883
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,4,128,1,fp8,fp8,0,5.047998428344727
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,8,128,1,float16,float16,0,6.541140747070313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,8,128,1,float16,fp8,0,5.366782379150391
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,8,128,1,fp8,fp8,0,5.052137756347657
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,48,128,1,float16,fp8,0,2.951190376281738
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,48,128,1,fp8,fp8,0,2.647964859008789
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,1,128,1,float16,float16,0,3.06918888092041
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,1,128,1,float16,fp8,0,2.531363105773926
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,1,128,1,fp8,fp8,0,2.508203125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,2,128,1,float16,float16,0,3.115675163269043
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,2,128,1,float16,fp8,0,2.4593280792236327
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,2,128,1,fp8,fp8,0,2.47882080078125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,4,128,1,float16,fp8,0,2.578878402709961
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,4,128,1,float16,float16,0,3.213673782348633
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,4,128,1,fp8,fp8,0,2.5032480239868162
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,8,128,1,float16,float16,0,3.136387252807617
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,8,128,1,float16,fp8,0,2.7631568908691406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,8,128,1,fp8,fp8,0,2.5210256576538086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,48,128,1,float16,fp8,0,1.4006367683410645
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,48,128,1,fp8,fp8,0,1.426574420928955
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,1,128,1,float16,float16,0,1.4901151657104492
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,1,128,1,float16,fp8,0,1.3040176391601563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,1,128,1,fp8,fp8,0,1.5574735641479491
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,2,128,1,float16,float16,0,1.4608367919921874
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,2,128,1,float16,fp8,0,1.3338784217834472
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,2,128,1,fp8,fp8,0,1.45797758102417
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,4,128,1,float16,float16,0,1.4925375938415528
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,4,128,1,float16,fp8,0,1.3634127616882323
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,4,128,1,fp8,fp8,0,1.4359951972961427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,8,128,1,float16,float16,0,1.490999984741211
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,8,128,1,float16,fp8,0,1.3083744049072266
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,8,128,1,fp8,fp8,0,1.5775216102600098
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,1,128,1,float16,fp8,0,7.094169616699219
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,1,128,1,float16,float16,0,8.984905242919922
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,1,128,1,fp8,fp8,0,7.1399375915527346
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,2,128,1,float16,float16,0,9.093430328369141
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,2,128,1,float16,fp8,0,7.209164428710937
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,2,128,1,fp8,fp8,0,7.21103515625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,4,128,1,float16,float16,0,9.107672119140625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,4,128,1,float16,fp8,0,7.185566711425781
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,4,128,1,fp8,fp8,0,7.146875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,8,128,1,float16,float16,0,9.378097534179688
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,8,128,1,float16,fp8,0,7.12018051147461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,8,128,1,fp8,fp8,0,7.226296234130859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,48,128,1,float16,float16,0,2.6360895156860353
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,48,128,1,float16,fp8,0,3.7868576049804688
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,48,128,1,float16,float16,0,1.5526559829711915
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,48,128,1,float16,float16,0,3.2151023864746096
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,48,128,1,fp8,fp8,0,3.841787338256836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,48,128,1,float16,float16,0,6.531942749023438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,1,128,1,float16,float16,0,4.389860916137695
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,48,128,1,float16,float16,0,4.664483261108399
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,1,128,1,float16,fp8,0,3.6256431579589843
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,1,128,1,fp8,fp8,0,3.6993568420410154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,48,128,1,float16,float16,0,11.531565093994141
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,2,128,1,float16,float16,0,4.408062362670899
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,2,128,1,float16,fp8,0,3.696628952026367
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,2,128,1,fp8,fp8,0,3.7385345458984376
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,4,128,1,float16,fp8,0,3.4748401641845703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,48,128,1,float16,float16,0,5.6840766906738285
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,4,128,1,float16,float16,0,4.388782501220703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,4,128,1,fp8,fp8,0,3.439825439453125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,48,128,1,float16,float16,0,2.2502559661865233
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,48,128,1,float16,fp8,0,2.1205423355102537
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,48,128,1,fp8,fp8,0,2.1168848037719727
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,8,128,1,float16,fp8,0,3.523931121826172
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,1,128,1,float16,float16,0,2.0332592010498045
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,8,128,1,fp8,fp8,0,3.573545455932617
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,8,128,1,float16,float16,0,4.530944061279297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,1,128,1,float16,fp8,0,2.1437088012695313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,1,128,1,fp8,fp8,0,1.7672624588012695
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,2,128,1,float16,fp8,0,1.822702407836914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,2,128,1,float16,float16,0,2.0856559753417967
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,2,128,1,fp8,fp8,0,2.043075180053711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,4,128,1,float16,float16,0,2.19836483001709
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,4,128,1,float16,fp8,0,2.3940656661987303
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,4,128,1,fp8,fp8,0,1.7681711196899415
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,48,128,1,float16,float16,0,1.1317376136779784
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,48,128,1,float16,fp8,0,0.9693008422851562
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,8,128,1,float16,fp8,0,1.8098783493041992
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,48,128,1,fp8,fp8,0,1.2505359649658203
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,8,128,1,fp8,fp8,0,2.119500732421875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,1,128,1,float16,fp8,0,0.9279007911682129
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,1,128,1,float16,float16,0,1.047220802307129
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,1,128,1,fp8,fp8,0,1.014748764038086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,2,128,1,float16,float16,0,1.0319999694824218
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,2,128,1,float16,fp8,0,0.9492624282836915
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,2,128,1,fp8,fp8,0,1.0010047912597657
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,4,128,1,float16,fp8,0,0.9114192008972168
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,4,128,1,fp8,fp8,0,1.0001680374145507
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,8,128,1,float16,float16,0,1.0764528274536134
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,8,128,1,fp8,fp8,0,0.9283535957336426
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,8,128,1,float16,fp8,0,1.019384002685547
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,8,128,1,float16,float16,0,2.080684852600098
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,4,128,1,float16,float16,0,1.0624688148498536
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,1,128,1,float16,fp8,0,9.342249298095703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,1,128,1,fp8,fp8,0,9.314730834960937
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,2,128,1,fp8,fp8,0,9.158920288085938
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,2,128,1,float16,fp8,0,9.446182250976562
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,1,128,1,float16,float16,0,11.961521911621094
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,2,128,1,float16,float16,0,11.884998321533203
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,4,128,1,float16,fp8,0,9.17959213256836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,4,128,1,float16,float16,0,11.586894226074218
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,48,128,1,float16,fp8,0,5.147108840942383
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,48,128,1,float16,float16,0,6.528663635253906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,48,128,1,fp8,fp8,0,4.9252479553222654
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,4,128,1,fp8,fp8,0,9.45697250366211
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,1,128,1,float16,float16,0,5.8762367248535154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,8,128,1,float16,fp8,0,9.588507080078125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,8,128,1,fp8,fp8,0,9.605353546142577
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,8,128,1,float16,float16,0,12.287798309326172
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,1,128,1,float16,fp8,0,4.547875213623047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,1,128,1,fp8,fp8,0,4.745460891723633
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,2,128,1,float16,fp8,0,4.696214294433593
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,2,128,1,float16,float16,0,6.009860610961914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,4,128,1,float16,fp8,0,4.715784072875977
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,2,128,1,fp8,fp8,0,4.868334579467773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,4,128,1,float16,float16,0,6.06872787475586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,4,128,1,fp8,fp8,0,4.8980976104736325
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,48,128,1,float16,fp8,0,2.598227119445801
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,48,128,1,fp8,fp8,0,2.4821887969970704
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,48,128,1,float16,float16,0,3.2869422912597654
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,8,128,1,float16,fp8,0,4.706475067138672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,8,128,1,float16,float16,0,5.882004928588867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,8,128,1,fp8,fp8,0,4.715683364868164
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,1,128,1,float16,float16,0,2.8878576278686525
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,1,128,1,float16,fp8,0,2.3356943130493164
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,1,128,1,fp8,fp8,0,2.3573984146118163
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,2,128,1,float16,float16,0,2.7616928100585936
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,2,128,1,float16,fp8,0,2.6666431427001953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,4,128,1,float16,fp8,0,2.385024070739746
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,4,128,1,fp8,fp8,0,2.6167648315429686
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,4,128,1,float16,float16,0,3.0057167053222655
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,8,128,1,float16,fp8,0,2.3161376953125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,8,128,1,float16,float16,0,2.8353296279907227
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,48,128,1,float16,float16,0,1.4787232398986816
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,48,128,1,float16,fp8,0,1.3121135711669922
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,48,128,1,fp8,fp8,0,1.29246244430542
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,8,128,1,fp8,fp8,0,2.5670799255371093
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,1,128,1,float16,float16,0,1.6033552169799805
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,1,128,1,float16,fp8,0,1.2360416412353517
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,2,128,1,float16,fp8,0,1.2045087814331055
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,2,128,1,float16,float16,0,1.4747872352600098
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,1,128,1,fp8,fp8,0,1.5276623725891114
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,2,128,1,fp8,fp8,0,1.1814831733703612
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,4,128,1,float16,float16,0,1.3583151817321777
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,2,128,1,fp8,fp8,0,2.385603141784668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,4,128,1,fp8,fp8,0,1.2117008209228515
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,48,128,1,float16,float16,0,0.7559135913848877
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,48,128,1,fp8,fp8,0,0.6611040115356446
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,48,128,1,float16,fp8,0,0.7705535888671875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,8,128,1,float16,float16,0,1.3908160209655762
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,8,128,1,float16,fp8,0,1.3285823822021485
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,8,128,1,fp8,fp8,0,1.2773728370666504
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,1,128,1,float16,float16,0,0.7022736072540283
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,1,128,1,float16,fp8,0,0.6460864067077636
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,1,128,1,fp8,fp8,0,0.6151840209960937
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,2,128,1,float16,float16,0,0.7245168209075927
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,2,128,1,fp8,fp8,0,0.6559792041778565
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,4,128,1,float16,float16,0,0.698683214187622
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,4,128,1,float16,fp8,0,0.6435184001922607
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,4,128,1,fp8,fp8,0,0.615174388885498
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,8,128,1,float16,fp8,0,0.633076810836792
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,8,128,1,float16,float16,0,0.732804822921753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,8,128,1,fp8,fp8,0,0.6339056015014648
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,4,128,1,float16,fp8,0,1.4168736457824707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,2,128,1,float16,fp8,0,0.7590752124786377
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,1,128,1,float16,fp8,0,5.361627197265625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,1,128,1,fp8,fp8,0,5.367561721801758
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,2,128,1,float16,fp8,0,5.519499206542969
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,1,128,1,float16,float16,0,6.952505493164063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,2,128,1,float16,float16,0,6.902164459228516
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,2,128,1,fp8,fp8,0,5.344235229492187
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,4,128,1,float16,fp8,0,5.353792190551758
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,4,128,1,float16,float16,0,6.617710113525391
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,48,128,1,float16,fp8,0,2.961016082763672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,4,128,1,fp8,fp8,0,5.454827117919922
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,48,128,1,float16,float16,0,3.8260223388671877
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,8,128,1,float16,fp8,0,5.522792053222656
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,48,128,1,fp8,fp8,0,2.9643856048583985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,8,128,1,float16,float16,0,6.97364501953125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,8,128,1,fp8,fp8,0,5.645280075073242
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,1,128,1,float16,float16,0,3.320700836181641
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,1,128,1,float16,fp8,0,2.6620672225952147
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,1,128,1,fp8,fp8,0,2.7636112213134765
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,2,128,1,float16,fp8,0,2.7636240005493162
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,2,128,1,float16,float16,0,3.408639907836914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,2,128,1,fp8,fp8,0,2.711742401123047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,4,128,1,float16,fp8,0,2.779524803161621
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,4,128,1,fp8,fp8,0,2.969758415222168
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,4,128,1,float16,float16,0,3.2319358825683593
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,48,128,1,float16,fp8,0,1.4889936447143555
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,48,128,1,float16,float16,0,1.892430305480957
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,8,128,1,float16,float16,0,3.3687759399414063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,48,128,1,fp8,fp8,0,1.5244223594665527
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,8,128,1,fp8,fp8,0,2.8034591674804688
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,1,128,1,float16,float16,0,1.5338255882263183
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,1,128,1,float16,fp8,0,1.6301904678344727
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,1,128,1,fp8,fp8,0,1.3573264122009276
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,2,128,1,float16,float16,0,1.7060192108154297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,2,128,1,fp8,fp8,0,1.3610527992248536
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,2,128,1,float16,fp8,0,1.787932777404785
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,4,128,1,float16,float16,0,1.5447440147399902
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,4,128,1,float16,fp8,0,1.6401567459106445
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,4,128,1,fp8,fp8,0,1.4130144119262695
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,48,128,1,float16,float16,0,0.9446623802185059
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,8,128,1,float16,float16,0,1.6142543792724608
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,8,128,1,fp8,fp8,0,1.3781583786010743
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,48,128,1,float16,fp8,0,0.9173151969909668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,8,128,1,float16,fp8,0,1.6745616912841796
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,8,128,1,float16,fp8,0,2.771406364440918
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,1,128,1,float16,float16,0,0.7860432147979737
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,48,128,1,fp8,fp8,0,0.8309167861938477
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,1,128,1,float16,fp8,0,0.8476495742797852
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,1,128,1,fp8,fp8,0,0.9180368423461914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,2,128,1,float16,float16,0,0.7960159778594971
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,2,128,1,float16,fp8,0,0.7889776229858398
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,2,128,1,fp8,fp8,0,0.7389008045196533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,4,128,1,float16,fp8,0,0.7278672218322754
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,4,128,1,fp8,fp8,0,0.7187856197357178
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,48,128,1,float16,float16,0,0.4698031902313232
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,8,128,1,float16,float16,0,0.8095279693603515
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,8,128,1,fp8,fp8,0,0.7402416229248047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,8,128,1,float16,fp8,0,0.9175727844238282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,1,128,1,float16,float16,0,0.41301760673522947
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,48,128,1,fp8,fp8,0,0.42902398109436035
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,1,128,1,float16,fp8,0,0.42415361404418944
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,1,128,1,fp8,fp8,0,0.41521439552307127
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,2,128,1,float16,float16,0,0.425110387802124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,2,128,1,fp8,fp8,0,0.3854095935821533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,4,128,1,float16,float16,0,0.4180784225463867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,4,128,1,float16,fp8,0,0.4266975879669189
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,4,128,1,fp8,fp8,0,0.39691040515899656
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,8,128,1,float16,float16,0,0.42876157760620115
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,8,128,1,float16,fp8,0,0.3813999891281128
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,8,128,1,fp8,fp8,0,0.3820768117904663
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,4,128,1,float16,float16,0,0.7840720176696777
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,48,128,1,float16,fp8,0,0.4318336009979248
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,2,128,1,float16,fp8,0,0.5004816055297852
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,1,128,1,float16,fp8,0,5.18326416015625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,1,128,1,fp8,fp8,0,5.259468841552734
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,2,128,1,float16,fp8,0,5.186062240600586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,1,128,1,float16,float16,0,6.552097320556641
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,2,128,1,float16,float16,0,6.400862121582032
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,2,128,1,fp8,fp8,0,5.1219535827636715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,4,128,1,float16,fp8,0,5.134830474853516
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,4,128,1,float16,float16,0,6.366110229492188
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,48,128,1,float16,float16,0,3.6946990966796873
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,4,128,1,fp8,fp8,0,5.146470260620117
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,48,128,1,float16,fp8,0,2.922612762451172
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,8,128,1,float16,fp8,0,5.224982452392578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,48,128,1,fp8,fp8,0,2.924491119384766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,8,128,1,float16,float16,0,6.711195373535157
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,8,128,1,fp8,fp8,0,5.342569732666016
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,1,128,1,fp8,fp8,0,2.5888431549072264
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,1,128,1,float16,fp8,0,2.9011920928955077
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,2,128,1,float16,fp8,0,2.6259103775024415
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,2,128,1,float16,float16,0,3.048391914367676
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,2,128,1,fp8,fp8,0,2.5655424118041994
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,4,128,1,float16,fp8,0,2.5993423461914062
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,4,128,1,float16,float16,0,3.197604751586914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,1,128,1,float16,float16,0,3.1172304153442383
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,4,128,1,fp8,fp8,0,2.585811233520508
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,8,128,1,float16,fp8,0,2.782619285583496
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,48,128,1,float16,float16,0,1.787068748474121
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,8,128,1,float16,float16,0,3.2137664794921874
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,8,128,1,fp8,fp8,0,2.5583391189575195
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,48,128,1,fp8,fp8,0,1.4638511657714843
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,48,128,1,float16,fp8,0,1.9384143829345704
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,1,128,1,float16,float16,0,1.440833568572998
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,1,128,1,float16,fp8,0,1.3312944412231444
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,1,128,1,fp8,fp8,0,1.4837519645690918
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,2,128,1,float16,float16,0,1.5826432228088378
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,2,128,1,float16,fp8,0,1.3322719573974608
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,2,128,1,fp8,fp8,0,1.2848784446716308
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,4,128,1,float16,fp8,0,1.5046095848083496
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,4,128,1,float16,float16,0,1.5799103736877442
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,4,128,1,fp8,fp8,0,1.3257231712341309
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,48,128,1,float16,float16,0,0.8634799957275391
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,8,128,1,fp8,fp8,0,1.315014362335205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,48,128,1,float16,fp8,0,0.8003199577331543
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,8,128,1,float16,float16,0,1.4771648406982423
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,48,128,1,fp8,fp8,0,0.8854991912841796
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,1,128,1,float16,float16,0,0.7280655860900879
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,1,128,1,float16,fp8,0,0.6999311923980713
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,1,128,1,fp8,fp8,0,0.7212495803833008
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,2,128,1,float16,fp8,0,0.6849152088165283
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,2,128,1,fp8,fp8,0,0.7835360050201416
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,4,128,1,float16,float16,0,0.7597184181213379
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,4,128,1,fp8,fp8,0,0.6766128063201904
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,8,128,1,float16,float16,0,0.758678388595581
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,8,128,1,float16,fp8,0,0.6703216075897217
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,48,128,1,float16,fp8,0,0.40758719444274905
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,8,128,1,fp8,fp8,0,0.7592688083648682
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,48,128,1,fp8,fp8,0,0.4304800033569336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,1,128,1,float16,float16,0,0.3796272039413452
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,1,128,1,float16,fp8,0,0.35897440910339357
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,1,128,1,fp8,fp8,0,0.3659856081008911
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,2,128,1,float16,float16,0,0.3932816028594971
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,2,128,1,float16,fp8,0,0.3566688060760498
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,2,128,1,fp8,fp8,0,0.35776960849761963
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,4,128,1,float16,float16,0,0.386244797706604
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,4,128,1,float16,fp8,0,0.3758975982666016
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,4,128,1,fp8,fp8,0,0.3556895971298218
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,8,128,1,float16,float16,0,0.3864383935928345
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,8,128,1,float16,fp8,0,0.3576848030090332
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,48,128,1,float16,float16,0,0.24543681144714355
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,48,128,1,float16,fp8,0,0.22449440956115724
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,48,128,1,fp8,fp8,0,0.22363839149475098
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,1,128,1,float16,float16,0,0.21483199596405028
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,8,128,1,float16,fp8,0,1.496628761291504
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,1,128,1,float16,fp8,0,0.19857439994812012
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,1,128,1,fp8,fp8,0,0.2077183961868286
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,2,128,1,fp8,fp8,0,0.19903199672698973
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,4,128,1,float16,float16,0,0.2096735954284668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,4,128,1,float16,fp8,0,0.19860960245132447
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,2,128,1,float16,float16,0,0.730072021484375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,4,128,1,fp8,fp8,0,0.1985327959060669
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,4,128,1,float16,fp8,0,0.6658383846282959
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,8,128,1,float16,float16,0,0.21169118881225585
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,8,128,1,float16,fp8,0,0.19617439508438111
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,8,128,1,fp8,fp8,0,0.1975664019584656
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,48,128,1,float16,float16,0,0.46501760482788085
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,8,128,1,fp8,fp8,0,0.36731839179992676
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,2,128,1,float16,float16,0,0.20867679119110108
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,2,128,1,float16,fp8,0,0.19680800437927246
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,1,128,1,float16,fp8,0,3.0553728103637696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,1,128,1,fp8,fp8,0,3.0578351974487306
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,1,128,1,float16,float16,0,3.6211551666259765
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,2,128,1,float16,fp8,0,3.0519392013549806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,2,128,1,float16,float16,0,3.6959087371826174
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,2,128,1,fp8,fp8,0,3.0658895492553713
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,4,128,1,float16,fp8,0,3.0686607360839844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,4,128,1,fp8,fp8,0,3.1224271774291994
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,4,128,1,float16,float16,0,3.7003631591796875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,8,128,1,fp8,fp8,0,3.1101856231689453
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,8,128,1,float16,float16,0,3.7720497131347654
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,8,128,1,float16,fp8,0,3.2523040771484375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,48,128,1,float16,fp8,0,1.8243440628051757
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,1,128,1,float16,float16,0,1.764406394958496
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,1,128,1,fp8,fp8,0,1.5979647636413574
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,1,128,1,float16,fp8,0,1.857748794555664
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,2,128,1,float16,fp8,0,1.5687376022338868
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,48,128,1,fp8,fp8,0,1.824073600769043
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,2,128,1,fp8,fp8,0,1.5444831848144531
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,48,128,1,float16,float16,0,2.3162927627563477
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,4,128,1,float16,fp8,0,1.5598159790039063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,4,128,1,float16,float16,0,1.8874303817749023
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,4,128,1,fp8,fp8,0,1.5817808151245116
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,8,128,1,float16,fp8,0,1.5596320152282714
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,8,128,1,float16,float16,0,1.7511791229248046
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,8,128,1,fp8,fp8,0,1.5463168144226074
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,48,128,1,float16,float16,0,1.0601008415222168
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,48,128,1,float16,fp8,0,0.9256352424621582
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,1,128,1,float16,float16,0,0.8632335662841797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,1,128,1,float16,fp8,0,0.7999055862426758
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,48,128,1,fp8,fp8,0,1.1336992263793946
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,2,128,1,float16,float16,0,0.8672672271728515
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,1,128,1,fp8,fp8,0,0.9867487907409668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,2,128,1,float16,fp8,0,0.7885824203491211
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,4,128,1,float16,float16,0,0.8898880004882812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,4,128,1,float16,fp8,0,0.8309455871582031
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,4,128,1,fp8,fp8,0,0.8404512405395508
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,8,128,1,float16,float16,0,0.9001744270324707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,2,128,1,float16,float16,0,1.9246959686279297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,8,128,1,float16,fp8,0,0.8102047920227051
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,48,128,1,float16,float16,0,0.5391136169433594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,48,128,1,float16,fp8,0,0.5047743797302247
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,1,128,1,float16,float16,0,0.43671679496765137
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,8,128,1,fp8,fp8,0,0.8654512405395508
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,48,128,1,fp8,fp8,0,0.5513967990875244
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,1,128,1,float16,fp8,0,0.41463680267333985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,2,128,1,float16,float16,0,0.44339518547058104
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,1,128,1,fp8,fp8,0,0.45290398597717285
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,2,128,1,float16,fp8,0,0.4120272159576416
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,2,128,1,fp8,fp8,0,0.44116640090942383
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,4,128,1,float16,float16,0,0.47721118927001954
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,4,128,1,float16,fp8,0,0.41453118324279786
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,4,128,1,fp8,fp8,0,0.4165311813354492
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,48,128,1,float16,float16,0,0.2918783903121948
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,8,128,1,float16,float16,0,0.47094078063964845
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,8,128,1,float16,fp8,0,0.4117248058319092
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,8,128,1,fp8,fp8,0,0.44199681282043457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,48,128,1,float16,fp8,0,0.2592704057693481
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,48,128,1,fp8,fp8,0,0.2604640007019043
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,1,128,1,float16,float16,0,0.24552481174468993
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,1,128,1,float16,fp8,0,0.23306241035461425
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,2,128,1,float16,float16,0,0.23739519119262695
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,2,128,1,float16,fp8,0,0.2245471954345703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,2,128,1,fp8,fp8,0,0.22553279399871826
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,4,128,1,float16,float16,0,0.246726393699646
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,4,128,1,float16,fp8,0,0.22788798809051514
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,4,128,1,fp8,fp8,0,0.2260063886642456
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,8,128,1,float16,float16,0,0.23849599361419677
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,8,128,1,float16,fp8,0,0.22595999240875245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,8,128,1,fp8,fp8,0,0.23149440288543702
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,48,128,1,float16,fp8,0,0.14769920110702514
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,48,128,1,fp8,fp8,0,0.14708479642868041
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,1,128,1,float16,float16,0,0.13301600217819215
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,1,128,1,float16,fp8,0,0.13248319625854493
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,1,128,1,fp8,fp8,0,0.12947360277175904
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,2,128,1,float16,float16,0,0.13311840295791627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,2,128,1,float16,fp8,0,0.12812000513076782
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,2,128,1,fp8,fp8,0,0.13044639825820922
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,4,128,1,float16,fp8,0,0.12832800149917603
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,2,128,1,fp8,fp8,0,0.78787841796875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,4,128,1,fp8,fp8,0,0.12780159711837769
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,8,128,1,float16,float16,0,0.13875360488891603
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,8,128,1,float16,fp8,0,0.12839200496673583
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,8,128,1,fp8,fp8,0,0.1272752046585083
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,1,128,1,fp8,fp8,0,0.22334558963775636
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,1,128,1,float16,fp8,0,3.10446720123291
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,1,128,1,fp8,fp8,0,3.1030271530151365
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,48,128,1,float16,float16,0,0.15879199504852295
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,1,128,1,float16,float16,0,3.5706192016601563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,2,128,1,float16,float16,0,3.6180801391601562
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,2,128,1,float16,fp8,0,3.0992656707763673
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,2,128,1,fp8,fp8,0,3.1005008697509764
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,4,128,1,float16,fp8,0,3.1093135833740235
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,4,128,1,fp8,fp8,0,3.103041648864746
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,4,128,1,float16,float16,0,3.6305423736572267
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,4,128,1,float16,float16,0,0.13526879549026488
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,8,128,1,float16,fp8,0,3.357156753540039
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,8,128,1,float16,float16,0,3.9169921875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,48,128,1,float16,float16,0,2.2349712371826174
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,48,128,1,float16,fp8,0,1.9296016693115234
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,8,128,1,fp8,fp8,0,3.1212928771972654
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,1,128,1,float16,fp8,0,1.562452793121338
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,48,128,1,fp8,fp8,0,1.9208751678466798
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,1,128,1,float16,float16,0,1.8581951141357422
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,1,128,1,fp8,fp8,0,1.572439956665039
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,2,128,1,float16,float16,0,1.9196304321289062
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,2,128,1,float16,fp8,0,1.765772819519043
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,2,128,1,fp8,fp8,0,1.5653471946716309
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,4,128,1,float16,fp8,0,1.5613743782043457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,4,128,1,fp8,fp8,0,1.5599823951721192
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,4,128,1,float16,float16,0,1.705843162536621
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,8,128,1,float16,fp8,0,1.573408031463623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,8,128,1,float16,float16,0,1.9158527374267578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,48,128,1,float16,float16,0,1.101251220703125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,48,128,1,float16,fp8,0,0.9726079940795899
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,8,128,1,fp8,fp8,0,1.8429744720458985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,1,128,1,float16,float16,0,0.8481072425842285
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,1,128,1,fp8,fp8,0,0.7982768058776856
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,1,128,1,float16,fp8,0,0.9029760360717773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,48,128,1,fp8,fp8,0,0.9706784248352051
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,2,128,1,float16,float16,0,0.9343407630920411
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,2,128,1,float16,fp8,0,0.8000032424926757
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,2,128,1,fp8,fp8,0,0.7943999767303467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,4,128,1,fp8,fp8,0,0.7970831871032715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,4,128,1,float16,float16,0,0.8610495567321778
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,8,128,1,float16,fp8,0,0.7945663928985596
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,8,128,1,float16,float16,0,0.8911375999450684
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,48,128,1,float16,float16,0,0.5754672050476074
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,8,128,1,fp8,fp8,0,0.9576383590698242
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,1,128,1,float16,float16,0,0.4301919937133789
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,48,128,1,float16,fp8,0,0.5
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,1,128,1,float16,fp8,0,0.41010560989379885
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,48,128,1,fp8,fp8,0,0.5006976127624512
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,1,128,1,fp8,fp8,0,0.4607423782348633
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,2,128,1,float16,float16,0,0.43629279136657717
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,2,128,1,float16,fp8,0,0.4524208068847656
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,2,128,1,fp8,fp8,0,0.4105696201324463
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,4,128,1,float16,fp8,0,0.4094687938690186
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,4,128,1,fp8,fp8,0,0.45131359100341795
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,8,128,1,float16,float16,0,0.45827679634094237
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,48,128,1,float16,float16,0,0.28849918842315675
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,8,128,1,float16,fp8,0,0.4299920082092285
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,48,128,1,float16,fp8,0,0.27732799053192136
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,8,128,1,fp8,fp8,0,0.41124801635742186
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,48,128,1,fp8,fp8,0,0.284225606918335
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,1,128,1,float16,fp8,0,0.2184432029724121
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,1,128,1,fp8,fp8,0,0.222654390335083
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,2,128,1,float16,float16,0,0.22559840679168702
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,2,128,1,float16,fp8,0,0.23116641044616698
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,2,128,1,fp8,fp8,0,0.21908481121063234
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,4,128,1,float16,float16,0,0.229803204536438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,4,128,1,fp8,fp8,0,0.23016800880432128
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,8,128,1,float16,fp8,0,0.2244112014770508
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,8,128,1,fp8,fp8,0,0.22340641021728516
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,48,128,1,float16,float16,0,0.15914080142974854
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,48,128,1,float16,fp8,0,0.1484943985939026
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,1,128,1,float16,float16,0,0.12779519557952881
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,1,128,1,float16,fp8,0,0.12444640398025512
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,4,128,1,float16,fp8,0,0.7927663803100586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,1,128,1,fp8,fp8,0,0.12329440116882324
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,2,128,1,float16,float16,0,0.1286911964416504
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,2,128,1,float16,fp8,0,0.12228959798812866
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,2,128,1,fp8,fp8,0,0.12150399684906006
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,4,128,1,float16,float16,0,0.12759679555892944
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,4,128,1,float16,fp8,0,0.12174559831619262
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,4,128,1,fp8,fp8,0,0.12173600196838379
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,8,128,1,float16,float16,0,0.13182560205459595
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,8,128,1,float16,fp8,0,0.12202080488204955
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,48,128,1,float16,float16,0,0.09375360012054443
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,8,128,1,fp8,fp8,0,0.12242239713668823
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,48,128,1,float16,fp8,0,0.08502240180969238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,48,128,1,fp8,fp8,0,0.0851631999015808
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,1,128,1,float16,float16,0,0.07667040228843688
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,1,128,1,float16,fp8,0,0.0738655984401703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,1,128,1,fp8,fp8,0,0.07417119741439819
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,2,128,1,float16,float16,0,0.07648959755897522
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,2,128,1,float16,fp8,0,0.07412639856338502
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,2,128,1,fp8,fp8,0,0.07363680005073547
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,4,128,1,float16,float16,0,0.07683680057525635
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,4,128,1,float16,fp8,0,0.07319679856300354
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,4,128,1,fp8,fp8,0,0.07364159822463989
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,8,128,1,float16,float16,0,0.07826399803161621
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,1,128,1,float16,float16,0,0.23350241184234619
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,8,128,1,float16,fp8,0,0.07429440021514892
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,8,128,1,fp8,fp8,0,0.07375360131263733
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,4,128,1,float16,fp8,0,0.22056479454040528
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,8,128,1,float16,float16,0,0.23363358974456788
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,48,128,1,fp8,fp8,0,0.14776959419250488
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,1,128,1,float16,float16,0,2.067540740966797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,1,128,1,float16,fp8,0,1.9646944046020507
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,1,128,1,fp8,fp8,0,1.9699199676513672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,2,128,1,float16,float16,0,2.1110607147216798
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,2,128,1,float16,fp8,0,1.9699663162231444
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,4,128,1,float16,float16,0,0.43773441314697265
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,2,128,1,fp8,fp8,0,1.9663936614990234
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,4,128,1,float16,float16,0,2.146779251098633
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,48,128,1,float16,fp8,0,1.257372760772705
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,48,128,1,float16,float16,0,1.4289728164672852
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,4,128,1,fp8,fp8,0,1.963430404663086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,4,128,1,float16,fp8,0,2.1149103164672853
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,8,128,1,fp8,fp8,0,1.9700431823730469
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,8,128,1,float16,float16,0,2.2340576171875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,8,128,1,float16,fp8,0,2.1482431411743166
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,48,128,1,fp8,fp8,0,1.382636833190918
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,1,128,1,float16,float16,0,1.0522624015808106
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,1,128,1,fp8,fp8,0,0.9922351837158203
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,1,128,1,float16,fp8,0,1.0662704467773438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,2,128,1,float16,float16,0,1.0435215950012207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,2,128,1,float16,fp8,0,0.9949088096618652
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,2,128,1,fp8,fp8,0,1.1999088287353517
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,4,128,1,float16,fp8,0,1.0198016166687012
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,4,128,1,fp8,fp8,0,1.1136896133422851
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,48,128,1,float16,float16,0,0.7179776191711426
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,8,128,1,float16,float16,0,1.0861264228820802
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,8,128,1,float16,fp8,0,1.0178784370422362
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,48,128,1,float16,fp8,0,0.677729606628418
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,8,128,1,fp8,fp8,0,1.0958463668823242
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,48,128,1,fp8,fp8,0,0.6485616207122803
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,1,128,1,float16,float16,0,0.5402639865875244
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,1,128,1,float16,fp8,0,0.5085519790649414
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,2,128,1,float16,fp8,0,0.5217936038970947
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,2,128,1,float16,float16,0,0.5265664100646973
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,2,128,1,fp8,fp8,0,0.5203504085540771
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,4,128,1,float16,float16,0,0.5346144199371338
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,4,128,1,float16,fp8,0,0.5226223945617676
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,4,128,1,fp8,fp8,0,0.5089312076568604
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,8,128,1,float16,float16,0,0.5508175849914551
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,8,128,1,float16,fp8,0,0.5139920234680175
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,8,128,1,fp8,fp8,0,0.5219808101654053
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,48,128,1,float16,fp8,0,0.3402911901473999
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,48,128,1,fp8,fp8,0,0.3330336093902588
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,1,128,1,float16,float16,0,0.2703488111495972
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,1,128,1,float16,fp8,0,0.26877760887145996
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,1,128,1,fp8,fp8,0,0.27295520305633547
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,2,128,1,float16,float16,0,0.2693327903747559
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,2,128,1,float16,fp8,0,0.2740832090377808
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,2,128,1,fp8,fp8,0,0.26869280338287355
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,4,128,1,float16,float16,0,0.27645919322967527
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,4,128,1,float16,fp8,0,0.26704959869384765
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,4,128,1,fp8,fp8,0,0.2713759899139404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,8,128,1,float16,float16,0,0.2833087921142578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,8,128,1,float16,fp8,0,0.26728799343109133
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,48,128,1,float16,fp8,0,0.18117120265960693
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,48,128,1,fp8,fp8,0,0.1792304039001465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,8,128,1,fp8,fp8,0,0.266264009475708
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,4,128,1,float16,float16,0,1.0535648345947266
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,1,128,1,float16,float16,0,0.1480687975883484
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,1,128,1,float16,fp8,0,0.14437439441680908
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,1,128,1,fp8,fp8,0,0.1449728012084961
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,2,128,1,float16,float16,0,0.15056639909744263
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,2,128,1,float16,fp8,0,0.14457440376281738
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,2,128,1,fp8,fp8,0,0.14488639831542968
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,4,128,1,float16,float16,0,0.1484928011894226
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,4,128,1,float16,fp8,0,0.1445312023162842
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,4,128,1,fp8,fp8,0,0.1451248049736023
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,8,128,1,float16,float16,0,0.15196160078048707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,8,128,1,float16,fp8,0,0.1455296039581299
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,8,128,1,fp8,fp8,0,0.14498080015182496
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,48,128,1,float16,float16,0,0.10718879699707032
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,48,128,1,float16,fp8,0,0.10059520006179809
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,48,128,1,fp8,fp8,0,0.10119040012359619
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,1,128,1,float16,float16,0,0.08500319719314575
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,1,128,1,fp8,fp8,0,0.0824783980846405
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,2,128,1,float16,float16,0,0.08500800132751465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,2,128,1,float16,fp8,0,0.08284159898757934
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,2,128,1,fp8,fp8,0,0.08227360248565674
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,4,128,1,float16,float16,0,0.08604159951210022
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,4,128,1,float16,fp8,0,0.08223040103912353
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,4,128,1,fp8,fp8,0,0.08262400031089782
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,1,128,1,fp8,fp8,0,0.5872176170349122
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,8,128,1,float16,fp8,0,0.08270559906959533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,8,128,1,fp8,fp8,0,0.08275039792060852
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,48,128,1,float16,float16,0,0.06410719752311707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,48,128,1,fp8,fp8,0,0.05976799726486206
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,1,128,1,float16,float16,0,0.05366399884223938
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,1,128,1,float16,fp8,0,0.05172320008277893
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,1,128,1,fp8,fp8,0,0.052132797241210935
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,2,128,1,float16,float16,0,0.0537168025970459
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,2,128,1,float16,fp8,0,0.05194240212440491
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,2,128,1,fp8,fp8,0,0.05179200172424316
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,4,128,1,float16,float16,0,0.05420960187911987
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,4,128,1,float16,fp8,0,0.05195680260658264
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,4,128,1,fp8,fp8,0,0.05216479897499084
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,8,128,1,float16,float16,0,0.055452799797058104
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,8,128,1,float16,fp8,0,0.05224480032920838
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,8,128,1,fp8,fp8,0,0.051744002103805545
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,48,128,1,float16,float16,0,0.1936192035675049
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,1,128,1,float16,fp8,0,0.08233439922332764
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,1,128,1,float16,fp8,0,2.1336896896362303
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,8,128,1,float16,float16,0,0.08858399987220764
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,48,128,1,float16,fp8,0,0.05930719971656799
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,48,128,1,float16,float16,0,0.3649535894393921
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,1,128,1,fp8,fp8,0,2.1344512939453124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,2,128,1,float16,float16,0,2.2352975845336913
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,2,128,1,float16,fp8,0,2.131823921203613
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,2,128,1,fp8,fp8,0,2.1343807220458983
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,4,128,1,float16,float16,0,2.2665103912353515
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,4,128,1,float16,fp8,0,2.131745529174805
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,4,128,1,fp8,fp8,0,2.133135986328125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,1,128,1,float16,float16,0,2.1948448181152345
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,48,128,1,float16,float16,0,1.6064319610595703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,8,128,1,float16,float16,0,2.3180416107177733
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,1,128,1,float16,float16,0,1.0777152061462403
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,8,128,1,fp8,fp8,0,2.1313552856445312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,8,128,1,float16,fp8,0,2.27053279876709
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,1,128,1,float16,fp8,0,1.0737152099609375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,48,128,1,fp8,fp8,0,1.423960018157959
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,1,128,1,fp8,fp8,0,1.1647680282592774
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,2,128,1,float16,float16,0,1.2058927536010742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,2,128,1,float16,fp8,0,1.1056672096252442
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,2,128,1,fp8,fp8,0,1.074841594696045
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,4,128,1,float16,fp8,0,1.073201560974121
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,4,128,1,float16,float16,0,1.1071311950683593
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,4,128,1,fp8,fp8,0,1.0747743606567384
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,48,128,1,float16,float16,0,0.800391960144043
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,48,128,1,float16,fp8,0,0.7215295791625976
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,1,128,1,float16,float16,0,0.5425183773040771
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,8,128,1,float16,float16,0,1.147753620147705
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,48,128,1,fp8,fp8,0,0.8149344444274902
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,8,128,1,float16,fp8,0,1.1734047889709474
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,1,128,1,float16,fp8,0,0.5819968223571778
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,1,128,1,fp8,fp8,0,0.5458655834197998
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,2,128,1,float16,float16,0,0.5453695774078369
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,2,128,1,float16,fp8,0,0.5761903762817383
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,2,128,1,fp8,fp8,0,0.5827583789825439
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,4,128,1,float16,float16,0,0.5663072109222412
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,4,128,1,fp8,fp8,0,0.5456031799316406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,4,128,1,float16,fp8,0,0.5757952213287354
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,8,128,1,float16,float16,0,0.5789440155029297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,48,128,1,float16,float16,0,0.41246399879455564
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,8,128,1,float16,fp8,0,0.5737279891967774
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,8,128,1,fp8,fp8,0,0.5631567955017089
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,48,128,1,float16,fp8,0,1.5844479560852052
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,1,128,1,float16,float16,0,0.2994447946548462
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,48,128,1,fp8,fp8,0,0.3777328014373779
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,1,128,1,float16,fp8,0,0.2966367959976196
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,1,128,1,fp8,fp8,0,0.28638079166412356
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,2,128,1,float16,float16,0,0.28194079399108884
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,2,128,1,float16,fp8,0,0.3042896032333374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,2,128,1,fp8,fp8,0,0.28577280044555664
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,4,128,1,float16,float16,0,0.2886064052581787
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,4,128,1,fp8,fp8,0,0.28293280601501464
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,8,128,1,float16,float16,0,0.3045248031616211
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,8,128,1,fp8,fp8,0,0.2871216058731079
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,48,128,1,float16,float16,0,0.21076159477233886
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,48,128,1,float16,fp8,0,0.20326240062713624
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,1,128,1,float16,float16,0,0.1523311972618103
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,48,128,1,fp8,fp8,0,0.19729599952697754
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,1,128,1,float16,fp8,0,0.15425280332565308
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,1,128,1,fp8,fp8,0,0.15140960216522217
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,2,128,1,float16,float16,0,0.15145119428634643
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,2,128,1,float16,fp8,0,0.15411360263824464
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,2,128,1,fp8,fp8,0,0.1514415979385376
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,4,128,1,float16,float16,0,0.15338560342788696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,4,128,1,float16,fp8,0,0.1527135968208313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,8,128,1,fp8,fp8,0,1.0934783935546875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,4,128,1,fp8,fp8,0,0.15131200551986695
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,8,128,1,float16,float16,0,0.1582703948020935
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,8,128,1,float16,fp8,0,0.15297759771347047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,8,128,1,fp8,fp8,0,0.15091999769210815
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,48,128,1,float16,float16,0,0.11548800468444824
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,48,128,1,fp8,fp8,0,0.10905439853668213
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,1,128,1,float16,float16,0,0.08529919981956482
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,1,128,1,float16,fp8,0,0.08429920077323913
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,1,128,1,fp8,fp8,0,0.0852400004863739
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,2,128,1,float16,float16,0,0.08542400002479553
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,2,128,1,float16,fp8,0,0.08394560217857361
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,2,128,1,fp8,fp8,0,0.0846239984035492
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,4,128,1,float16,float16,0,0.08760160207748413
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,4,128,1,float16,fp8,0,0.08418400287628174
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,4,128,1,fp8,fp8,0,0.0850816011428833
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,8,128,1,float16,float16,0,0.08979679942131043
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,8,128,1,float16,fp8,0,0.08395360112190246
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,8,128,1,fp8,fp8,0,0.08493760228157043
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,48,128,1,float16,float16,0,0.06844639778137207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,48,128,1,float16,fp8,0,0.06155520081520081
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,48,128,1,fp8,fp8,0,0.06258879899978638
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,1,128,1,float16,float16,0,0.051660799980163576
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,1,128,1,float16,fp8,0,0.05051680207252503
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,1,128,1,fp8,fp8,0,0.0513808012008667
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,2,128,1,float16,float16,0,0.05167199969291687
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,2,128,1,float16,fp8,0,0.050539201498031615
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,2,128,1,fp8,fp8,0,0.050547200441360476
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,4,128,1,float16,float16,0,0.0520687997341156
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,4,128,1,float16,fp8,0,0.05102239847183228
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,4,128,1,fp8,fp8,0,0.05081279873847962
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,48,128,1,float16,fp8,0,0.37059199810028076
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,8,128,1,float16,float16,0,0.05366399884223938
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,8,128,1,fp8,fp8,0,0.05137119889259338
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,48,128,1,float16,float16,0,0.04086720049381256
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,1,128,1,float16,float16,0,0.03690559864044189
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,48,128,1,float16,fp8,0,0.04110400080680847
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,1,128,1,float16,fp8,0,0.03506560027599335
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,1,128,1,fp8,fp8,0,0.03506399989128113
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,2,128,1,float16,float16,0,0.03711360096931458
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,2,128,1,fp8,fp8,0,0.035094401240348815
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,2,128,1,float16,fp8,0,0.03506079912185669
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,4,128,1,float16,float16,0,0.03712159991264343
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,4,128,1,float16,fp8,0,0.28576319217681884
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,4,128,1,float16,fp8,0,0.03506560027599335
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,8,128,1,float16,float16,0,0.0371071994304657
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,4,128,1,fp8,fp8,0,0.035099199414253233
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,8,128,1,float16,fp8,0,0.03505600094795227
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,8,128,1,fp8,fp8,0,0.03507519960403442
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,8,128,1,float16,fp8,0,0.2855823993682861
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,48,128,1,float16,fp8,0,0.10775200128555298
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,48,1,128,1,float16,float16,0,1.5607343673706056
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,48,2,128,1,float16,float16,0,1.589230442047119
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,48,1,128,1,fp8,fp8,0,1.6455936431884766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,48,1,128,1,float16,fp8,0,1.6450607299804687
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,48,2,128,1,float16,fp8,0,1.6422544479370118
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,8,128,1,float16,fp8,0,0.050444799661636355
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,48,128,1,fp8,fp8,0,0.04117439985275269
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,48,2,128,1,fp8,fp8,0,1.6438127517700196
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,48,4,128,1,float16,float16,0,1.6744640350341797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,48,4,128,1,fp8,fp8,0,1.641169548034668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,48,4,128,1,float16,fp8,0,1.7249616622924804
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,48,8,128,1,float16,float16,0,1.7001808166503907
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,48,8,128,1,float16,fp8,0,1.640225601196289
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,48,1,128,1,float16,float16,0,0.7889455795288086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,48,48,128,1,float16,float16,0,1.2939552307128905
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,48,1,128,1,float16,fp8,0,0.8378479957580567
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,48,8,128,1,fp8,fp8,0,1.6394224166870117
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,48,2,128,1,float16,float16,0,0.7916416168212891
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,48,1,128,1,fp8,fp8,0,0.8953184127807617
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,48,48,128,1,float16,fp8,0,1.1728896141052245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,48,48,128,1,fp8,fp8,0,1.2496784210205079
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,48,2,128,1,float16,fp8,0,0.8458800315856934
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,48,4,128,1,float16,float16,0,0.811302375793457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,48,2,128,1,fp8,fp8,0,0.8376336097717285
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,48,4,128,1,float16,fp8,0,0.8277088165283203
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,48,4,128,1,fp8,fp8,0,0.887718391418457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,48,8,128,1,fp8,fp8,0,0.837822437286377
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,48,1,128,1,float16,float16,0,0.4158624172210693
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,48,48,128,1,float16,fp8,0,0.59410080909729
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,48,1,128,1,float16,fp8,0,0.4244719982147217
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,48,48,128,1,fp8,fp8,0,0.5921279907226562
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,48,2,128,1,float16,float16,0,0.40424160957336425
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,48,2,128,1,float16,fp8,0,0.4237567901611328
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,48,2,128,1,fp8,fp8,0,0.4218560218811035
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,48,4,128,1,float16,float16,0,0.41379518508911134
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,48,4,128,1,float16,fp8,0,0.4207759857177734
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,48,4,128,1,fp8,fp8,0,0.4218111991882324
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,48,8,128,1,float16,float16,0,0.4341536045074463
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,48,8,128,1,float16,fp8,0,0.42066240310668945
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,48,8,128,1,fp8,fp8,0,0.42131838798522947
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,48,48,128,1,float16,float16,0,0.3323632001876831
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,48,48,128,1,float16,fp8,0,0.30373759269714357
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,48,48,128,1,fp8,fp8,0,0.30399041175842284
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,48,1,128,1,float16,float16,0,0.2104559898376465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,48,1,128,1,float16,fp8,0,0.2180255889892578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,48,1,128,1,fp8,fp8,0,0.21876320838928223
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,48,2,128,1,float16,float16,0,0.2112895965576172
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,48,2,128,1,float16,fp8,0,0.21815199851989747
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,48,2,128,1,fp8,fp8,0,0.21916799545288085
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,48,4,128,1,float16,float16,0,0.2160207986831665
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,48,4,128,1,float16,fp8,0,0.21910719871520995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,48,8,128,1,float16,float16,0,0.8691184043884277
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,48,4,128,1,fp8,fp8,0,0.2185983896255493
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,48,8,128,1,float16,float16,0,0.22492640018463134
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,48,8,128,1,float16,fp8,0,0.21906719207763672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,48,8,128,1,fp8,fp8,0,0.21824319362640382
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,48,48,128,1,float16,float16,0,0.17433120012283326
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,48,48,128,1,float16,fp8,0,0.16022720336914062
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,48,48,128,1,float16,float16,0,0.654260778427124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,48,48,128,1,fp8,fp8,0,0.16062239408493043
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,48,1,128,1,float16,float16,0,0.11524640321731568
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,48,1,128,1,float16,fp8,0,0.1177664041519165
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,48,1,128,1,fp8,fp8,0,0.1174847960472107
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,48,2,128,1,float16,fp8,0,0.11755039691925048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,48,2,128,1,float16,float16,0,0.1156000018119812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,48,2,128,1,fp8,fp8,0,0.11715840101242066
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,48,4,128,1,float16,float16,0,0.11796000003814697
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,48,4,128,1,float16,fp8,0,0.11737439632415772
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,48,4,128,1,fp8,fp8,0,0.11763999462127686
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,48,8,128,1,float16,float16,0,0.12217919826507569
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,48,1,128,1,fp8,fp8,0,0.4265535831451416
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,48,48,128,1,float16,float16,0,0.09480800032615662
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,48,8,128,1,float16,fp8,0,0.11780320405960083
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,48,8,128,1,fp8,fp8,0,0.11777440309524537
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,48,48,128,1,fp8,fp8,0,0.0889952003955841
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,48,1,128,1,float16,float16,0,0.06541280150413513
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,48,1,128,1,float16,fp8,0,0.06567680239677429
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,48,2,128,1,float16,float16,0,0.06598399877548218
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,48,1,128,1,fp8,fp8,0,0.06582239866256714
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,48,2,128,1,fp8,fp8,0,0.06562880277633668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,48,4,128,1,float16,float16,0,0.06740800142288209
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,48,4,128,1,float16,fp8,0,0.06569759845733643
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,48,4,128,1,fp8,fp8,0,0.06577280163764954
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,48,8,128,1,float16,float16,0,0.07020480036735535
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,48,8,128,1,float16,fp8,0,0.06579040288925171
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,48,8,128,1,fp8,fp8,0,0.06599199771881104
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,48,48,128,1,float16,fp8,0,0.04938240051269531
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,48,48,128,1,float16,float16,0,0.055587202310562134
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,48,48,128,1,fp8,fp8,0,0.05022720098495483
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,48,2,128,1,float16,fp8,0,0.06576480269432068
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,48,1,128,1,float16,float16,0,0.03904640078544617
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,48,1,128,1,float16,fp8,0,0.03918400108814239
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,48,2,128,1,float16,float16,0,0.03929600119590759
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,48,2,128,1,float16,fp8,0,0.039124798774719236
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,48,2,128,1,fp8,fp8,0,0.03911199867725372
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,48,4,128,1,float16,float16,0,0.03957920074462891
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,48,4,128,1,float16,fp8,0,0.03907999992370605
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,48,4,128,1,fp8,fp8,0,0.039155200123786926
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,48,8,128,1,float16,float16,0,0.04108000099658966
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,48,8,128,1,float16,fp8,0,0.03913759887218475
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,48,8,128,1,fp8,fp8,0,0.03907679915428162
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,48,48,128,1,float16,float16,0,0.03303360044956207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,48,48,128,1,float16,fp8,0,0.03291999995708465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,48,48,128,1,fp8,fp8,0,0.03286080062389374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,48,1,128,1,float16,float16,0,0.02694559991359711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,48,1,128,1,float16,fp8,0,0.02689119875431061
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,48,1,128,1,fp8,fp8,0,0.026788800954818726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,48,2,128,1,float16,float16,0,0.02678399980068207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,48,2,128,1,float16,fp8,0,0.026868799328804018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,48,2,128,1,fp8,fp8,0,0.026862400770187377
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,48,4,128,1,float16,fp8,0,0.026785600185394286
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,48,4,128,1,fp8,fp8,0,0.026800000667572023
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,48,8,128,1,float16,float16,0,0.02892000079154968
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,48,8,128,1,float16,fp8,0,0.0268095999956131
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,48,8,128,1,fp8,fp8,0,0.026867198944091796
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,48,48,128,1,float16,float16,0,0.02073120027780533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,48,48,128,1,float16,fp8,0,0.02074880003929138
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,48,48,128,1,fp8,fp8,0,0.020715199410915375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,48,1,128,1,float16,float16,0,0.01868479996919632
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,48,1,128,1,float16,fp8,0,0.018617600202560425
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,48,1,128,1,fp8,fp8,0,0.018607999384403228
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,48,2,128,1,float16,float16,0,0.01866080015897751
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,48,2,128,1,float16,fp8,0,0.018588800728321076
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,48,2,128,1,fp8,fp8,0,0.01857919991016388
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,48,4,128,1,float16,float16,0,0.01865919977426529
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,48,4,128,1,float16,fp8,0,0.018648000061511995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,48,4,128,1,fp8,fp8,0,0.01865919977426529
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,48,8,128,1,float16,float16,0,0.018688000738620758
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,48,8,128,1,float16,fp8,0,0.018675200641155243
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,48,8,128,1,fp8,fp8,0,0.018607999384403228
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,48,8,128,1,float16,fp8,0,0.8279760360717774
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,48,1,128,1,float16,fp8,0,0.6972208023071289
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,48,1,128,1,float16,float16,0,0.6471856117248536
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,48,1,128,1,fp8,fp8,0,0.6978928089141846
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,48,2,128,1,float16,float16,0,0.6489727973937989
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,48,48,128,1,float16,fp8,0,0.08867359757423401
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,48,2,128,1,float16,fp8,0,0.6965839862823486
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,48,2,128,1,fp8,fp8,0,0.6964335918426514
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,48,4,128,1,float16,float16,0,0.6701519966125489
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,48,1,128,1,fp8,fp8,0,0.03907679915428162
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,48,4,128,1,fp8,fp8,0,0.694649600982666
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,48,4,128,1,float16,fp8,0,0.6945824146270752
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,48,8,128,1,float16,float16,0,0.7088479995727539
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,48,8,128,1,float16,fp8,0,0.6949327945709228
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,48,4,128,1,float16,float16,0,0.02725600004196167
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,48,48,128,1,float16,float16,0,0.5708831787109375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,48,48,128,1,float16,fp8,0,0.5222064018249511
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,48,1,128,1,float16,float16,0,0.3308527946472168
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,48,48,128,1,fp8,fp8,0,0.522654390335083
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,48,1,128,1,fp8,fp8,0,0.35370559692382814
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,48,2,128,1,float16,fp8,0,0.35324161052703856
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,48,2,128,1,fp8,fp8,0,0.3535007953643799
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,48,2,128,1,float16,float16,0,0.33164799213409424
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,48,4,128,1,float16,float16,0,0.34085760116577146
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,48,4,128,1,float16,fp8,0,0.3538624048233032
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,48,4,128,1,fp8,fp8,0,0.3531536102294922
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,48,8,128,1,float16,float16,0,0.3610480070114136
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,48,8,128,1,float16,fp8,0,0.354420804977417
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,48,8,128,1,fp8,fp8,0,0.3535680055618286
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,48,48,128,1,float16,float16,0,0.2894239902496338
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,48,48,128,1,float16,fp8,0,0.2666975975036621
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,48,1,128,1,float16,float16,0,0.17220799922943114
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,48,48,128,1,fp8,fp8,0,0.26753759384155273
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,48,1,128,1,float16,fp8,0,0.18143839836120607
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,48,1,128,1,fp8,fp8,0,0.18227360248565674
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,48,2,128,1,float16,float16,0,0.17231839895248413
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,48,2,128,1,float16,fp8,0,0.18214399814605714
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,48,2,128,1,fp8,fp8,0,0.1827679991722107
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,48,4,128,1,float16,float16,0,0.17760640382766724
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,48,4,128,1,float16,fp8,0,0.18212159872055053
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,48,4,128,1,fp8,fp8,0,0.18205440044403076
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,48,8,128,1,float16,float16,0,0.1864832043647766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,48,8,128,1,float16,fp8,0,0.18305439949035646
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,48,48,128,1,float16,float16,0,0.15268800258636475
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,48,48,128,1,float16,fp8,0,0.1417248010635376
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,48,1,128,1,float16,float16,0,0.09560319781303406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,48,48,128,1,fp8,fp8,0,0.14237600564956665
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,48,1,128,1,fp8,fp8,0,0.09917600154876709
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,48,2,128,1,float16,float16,0,0.09578080177307129
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,48,2,128,1,fp8,fp8,0,0.09925600290298461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,48,4,128,1,float16,float16,0,0.09760479927062989
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,48,4,128,1,float16,fp8,0,0.09915040135383606
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,48,4,128,1,fp8,fp8,0,0.09924319982528687
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,48,8,128,1,float16,float16,0,0.10209439992904663
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,48,8,128,1,float16,fp8,0,0.09987040162086487
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,48,8,128,1,fp8,fp8,0,0.09955520033836365
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,48,48,128,1,float16,float16,0,0.0858735978603363
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,48,48,128,1,float16,fp8,0,0.07954400181770324
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,48,48,128,1,fp8,fp8,0,0.07988319993019104
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,48,1,128,1,float16,float16,0,0.05551519989967346
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,48,1,128,1,float16,fp8,0,0.05594080090522766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,48,1,128,1,fp8,fp8,0,0.05559520125389099
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,48,2,128,1,float16,float16,0,0.05562880039215088
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,48,2,128,1,float16,fp8,0,0.055636799335479735
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,48,2,128,1,fp8,fp8,0,0.055508798360824584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,48,4,128,1,float16,float16,0,0.056852799654006955
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,48,4,128,1,float16,fp8,0,0.05572320222854614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,48,4,128,1,fp8,fp8,0,0.055534398555755614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,48,8,128,1,float16,float16,0,0.05949599742889404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,48,8,128,1,float16,fp8,0,0.056036800146102905
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,48,8,128,1,fp8,fp8,0,0.05595679879188538
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,48,48,128,1,float16,fp8,0,0.04320000112056732
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,48,1,128,1,float16,fp8,0,0.35318241119384763
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,48,1,128,1,float16,float16,0,0.03148640096187592
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,48,1,128,1,float16,fp8,0,0.032041600346565245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,48,1,128,1,fp8,fp8,0,0.03284960091114044
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,48,2,128,1,float16,float16,0,0.03104960024356842
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,48,2,128,1,float16,fp8,0,0.03281280100345611
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,48,2,128,1,fp8,fp8,0,0.032278400659561154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,48,4,128,1,float16,float16,0,0.03290719985961914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,48,4,128,1,float16,fp8,0,0.03233759999275208
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,48,4,128,1,fp8,fp8,0,0.031595200300216675
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,48,8,128,1,float16,float16,0,0.03299199938774109
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,48,8,128,1,float16,fp8,0,0.03210079967975617
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,48,8,128,1,fp8,fp8,0,0.032595199346542356
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,48,48,128,1,float16,float16,0,0.026862400770187377
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,48,48,128,1,float16,fp8,0,0.028934401273727418
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,48,48,128,1,fp8,fp8,0,0.028809601068496705
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,48,1,128,1,float16,float16,0,0.02284960001707077
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,48,1,128,1,float16,fp8,0,0.022720000147819518
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,48,1,128,1,fp8,fp8,0,0.022993600368499754
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,48,2,128,1,float16,float16,0,0.022699199616909027
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,48,2,128,1,float16,fp8,0,0.023158399760723113
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,48,2,128,1,fp8,fp8,0,0.022756800055503845
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,48,4,128,1,float16,float16,0,0.022793599963188173
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,48,4,128,1,float16,fp8,0,0.022780799865722658
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,48,4,128,1,fp8,fp8,0,0.022753599286079406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,48,8,128,1,float16,float16,0,0.02452320009469986
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,48,8,128,1,float16,fp8,0,0.023132799565792082
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,48,8,128,1,fp8,fp8,0,0.023078399896621703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,48,8,128,1,fp8,fp8,0,0.18282400369644164
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,48,48,128,1,float16,float16,0,0.018646399676799773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,48,48,128,1,float16,fp8,0,0.018671999871730804
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,48,48,128,1,fp8,fp8,0,0.01870879977941513
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,48,1,128,1,float16,fp8,0,0.09862080216407776
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,48,1,128,1,float16,float16,0,0.015479999780654907
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,48,1,128,1,float16,fp8,0,0.0162992000579834
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,48,1,128,1,fp8,fp8,0,0.01562879979610443
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,48,2,128,1,float16,float16,0,0.015907199680805208
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,48,2,128,1,float16,fp8,0,0.016345599293708803
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,48,2,128,1,float16,fp8,0,0.09862880110740661
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,48,2,128,1,fp8,fp8,0,0.01658399999141693
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,48,4,128,1,float16,fp8,0,0.016380800306797026
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,48,4,128,1,float16,float16,0,0.016289600729942323
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,48,4,128,1,fp8,fp8,0,0.014580799639225006
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,48,8,128,1,float16,float16,0,0.01653600037097931
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,48,8,128,1,float16,fp8,0,0.016569599509239197
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,48,8,128,1,fp8,fp8,0,0.014603200554847717
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,48,48,128,1,float16,fp8,0,0.016607999801635742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,48,48,128,1,fp8,fp8,0,0.016596800088882445
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,48,1,128,1,float16,float16,0,0.014508800208568573
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,48,1,128,1,float16,fp8,0,0.014830400049686433
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,48,1,128,1,fp8,fp8,0,0.014587199687957764
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,48,2,128,1,float16,float16,0,0.014537599682807923
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,48,2,128,1,float16,fp8,0,0.014563199877738953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,48,2,128,1,fp8,fp8,0,0.014550399780273438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,48,4,128,1,float16,float16,0,0.01451359987258911
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,48,4,128,1,float16,fp8,0,0.014603200554847717
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,48,4,128,1,fp8,fp8,0,0.014577600359916686
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,48,8,128,1,float16,fp8,0,0.014574399590492249
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,48,8,128,1,fp8,fp8,0,0.014542399346828461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,48,48,128,1,float16,float16,0,0.048214399814605714
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,48,8,128,1,fp8,fp8,0,0.6961599826812744
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,48,1,128,1,float16,float16,0,0.400270414352417
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,48,1,128,1,float16,fp8,0,0.42231040000915526
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,48,48,128,1,fp8,fp8,0,0.04323360025882721
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,48,1,128,1,fp8,fp8,0,0.42144160270690917
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,48,2,128,1,float16,float16,0,0.4011663913726807
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,48,2,128,1,float16,fp8,0,0.42223200798034666
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,48,2,128,1,fp8,fp8,0,0.42183837890625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,48,4,128,1,float16,float16,0,0.4084784030914307
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,48,4,128,1,float16,fp8,0,0.4211103916168213
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,48,4,128,1,fp8,fp8,0,0.422603178024292
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,48,8,128,1,float16,float16,0,0.42743678092956544
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,48,8,128,1,float16,fp8,0,0.4207920074462891
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,48,8,128,1,fp8,fp8,0,0.42120962142944335
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,48,48,128,1,float16,float16,0,0.3206624031066895
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,48,1,128,1,float16,float16,0,0.2070319890975952
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,48,48,128,1,float16,fp8,0,0.3016688108444214
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,48,48,128,1,fp8,fp8,0,0.3005615949630737
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,48,1,128,1,fp8,fp8,0,0.21627678871154785
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,48,2,128,1,float16,float16,0,0.207476806640625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,48,2,128,1,float16,fp8,0,0.21641600131988525
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,48,4,128,1,float16,float16,0,0.21136319637298584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,48,4,128,1,float16,fp8,0,0.21628639698028565
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,48,4,128,1,fp8,fp8,0,0.21667520999908446
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,48,8,128,1,float16,float16,0,0.220414400100708
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,48,8,128,1,float16,fp8,0,0.21592960357666016
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,48,8,128,1,fp8,fp8,0,0.21647040843963622
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,48,48,128,1,float16,float16,0,0.16483839750289916
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,48,48,128,1,float16,float16,0,0.01650879979133606
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,48,48,128,1,float16,fp8,0,0.15572160482406616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,48,1,128,1,float16,float16,0,0.11002720594406128
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,48,48,128,1,fp8,fp8,0,0.1559167981147766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,48,1,128,1,float16,fp8,0,0.11304639577865601
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,48,2,128,1,float16,float16,0,0.11030399799346924
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,48,2,128,1,float16,fp8,0,0.11329280138015747
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,48,8,128,1,float16,float16,0,0.014564800262451171
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,48,4,128,1,float16,float16,0,0.11171200275421142
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,48,4,128,1,float16,fp8,0,0.11356799602508545
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,48,4,128,1,fp8,fp8,0,0.11331039667129517
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,48,8,128,1,float16,float16,0,0.11610560417175293
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,48,8,128,1,float16,fp8,0,0.11359039545059205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,48,48,128,1,float16,float16,0,0.08859840035438538
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,48,8,128,1,fp8,fp8,0,0.11352640390396118
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,48,48,128,1,float16,fp8,0,0.08429279923439026
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,48,1,128,1,float16,float16,0,0.061673599481582644
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,48,48,128,1,fp8,fp8,0,0.08419359922409057
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,48,1,128,1,fp8,fp8,0,0.06165279746055603
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,48,2,128,1,float16,float16,0,0.061660802364349364
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,48,2,128,1,float16,fp8,0,0.0616815984249115
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,48,2,128,1,fp8,fp8,0,0.0617904007434845
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,48,4,128,1,float16,float16,0,0.06248160004615784
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,48,4,128,1,float16,fp8,0,0.061680001020431516
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,48,4,128,1,fp8,fp8,0,0.061947202682495116
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,48,8,128,1,float16,float16,0,0.06485599875450135
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,48,8,128,1,float16,fp8,0,0.06189759969711304
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,48,8,128,1,fp8,fp8,0,0.06287680268287658
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,48,48,128,1,float16,float16,0,0.05140640139579773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,48,48,128,1,float16,fp8,0,0.04734239876270294
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,48,48,128,1,fp8,fp8,0,0.04755359888076782
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,48,1,128,1,float16,fp8,0,0.037041598558425905
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,48,1,128,1,fp8,fp8,0,0.03704800009727478
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,48,2,128,1,float16,float16,0,0.03545759916305542
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,48,2,128,1,float16,fp8,0,0.03706879913806915
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,48,2,128,1,fp8,fp8,0,0.03697920143604279
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,48,4,128,1,float16,float16,0,0.03617919981479645
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,48,4,128,1,float16,fp8,0,0.03702239990234375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,48,1,128,1,float16,fp8,0,0.21619200706481934
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,48,4,128,1,fp8,fp8,0,0.03713920116424561
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,48,8,128,1,float16,float16,0,0.03706879913806915
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,48,8,128,1,float16,fp8,0,0.03700959980487824
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,48,8,128,1,fp8,fp8,0,0.037099200487136844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,48,2,128,1,fp8,fp8,0,0.2160128116607666
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,48,48,128,1,float16,fp8,0,0.026950401067733765
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,48,48,128,1,fp8,fp8,0,0.026913601160049438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,48,1,128,1,float16,float16,0,0.022147199511528014
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,48,1,128,1,float16,fp8,0,0.02267040014266968
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,48,1,128,1,fp8,fp8,0,0.022672000527381896
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,48,2,128,1,float16,float16,0,0.022443200647830962
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,48,2,128,1,float16,fp8,0,0.022777600586414336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,48,2,128,1,fp8,fp8,0,0.022657600045204163
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,48,4,128,1,float16,float16,0,0.022735999524593355
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,48,4,128,1,float16,fp8,0,0.02248000055551529
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,48,4,128,1,fp8,fp8,0,0.02268960028886795
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,48,8,128,1,float16,float16,0,0.022676800191402436
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,48,8,128,1,float16,fp8,0,0.022385600209236144
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,48,8,128,1,fp8,fp8,0,0.022569599747657775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,48,48,128,1,float16,float16,0,0.01858240067958832
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,48,48,128,1,float16,fp8,0,0.01868640035390854
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,48,1,128,1,fp8,fp8,0,0.11285920143127441
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,48,48,128,1,fp8,fp8,0,0.018668800592422485
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,48,1,128,1,float16,float16,0,0.01652639955282211
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,48,1,128,1,float16,fp8,0,0.016606399416923524
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,48,2,128,1,float16,float16,0,0.016579200327396394
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,48,1,128,1,fp8,fp8,0,0.016620799899101257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,48,2,128,1,fp8,fp8,0,0.11334079504013062
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,48,2,128,1,fp8,fp8,0,0.016531200706958772
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,48,4,128,1,float16,fp8,0,0.016543999314308167
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,48,4,128,1,fp8,fp8,0,0.01653600037097931
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,48,8,128,1,float16,float16,0,0.016540800034999848
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,48,8,128,1,float16,fp8,0,0.016545599699020384
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,48,8,128,1,fp8,fp8,0,0.016630400717258454
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,48,48,128,1,float16,float16,0,0.013967999815940857
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,48,48,128,1,float16,fp8,0,0.012548799812793731
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,48,48,128,1,fp8,fp8,0,0.013067199289798737
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,48,1,128,1,float16,float16,0,0.011641599982976914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,48,1,128,1,float16,fp8,0,0.012444800138473511
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,48,1,128,1,float16,fp8,0,0.061668801307678225
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,48,1,128,1,fp8,fp8,0,0.012745599448680877
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,48,2,128,1,float16,float16,0,0.012457600235939026
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,48,2,128,1,float16,fp8,0,0.012495999783277511
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,48,4,128,1,float16,float16,0,0.01241919994354248
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,48,2,128,1,fp8,fp8,0,0.012451200187206269
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,48,4,128,1,fp8,fp8,0,0.01241919994354248
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,48,8,128,1,float16,float16,0,0.012425599992275238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,48,8,128,1,float16,fp8,0,0.012508800625801087
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,48,8,128,1,fp8,fp8,0,0.012443199753761292
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,48,48,128,1,float16,float16,0,0.012432000041007996
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,48,48,128,1,float16,fp8,0,0.012451200187206269
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,48,48,128,1,fp8,fp8,0,0.012414400279521943
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,48,1,128,1,float16,float16,0,0.03560320138931274
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,48,1,128,1,float16,float16,0,0.010929600149393082
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,48,1,128,1,float16,fp8,0,0.011791999638080596
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,48,1,128,1,fp8,fp8,0,0.010543999820947647
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,48,2,128,1,float16,float16,0,0.010828799754381179
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,48,2,128,1,float16,fp8,0,0.010595200210809707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,48,2,128,1,fp8,fp8,0,0.012139199674129486
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,48,4,128,1,float16,float16,0,0.01050880029797554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,48,4,128,1,float16,fp8,0,0.010486400127410889
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,48,4,128,1,fp8,fp8,0,0.010521599650382995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,48,8,128,1,float16,float16,0,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,48,8,128,1,fp8,fp8,0,0.011086399853229522
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,48,1,128,1,float16,float16,0,0.3140160083770752
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,48,1,128,1,float16,fp8,0,0.3218544006347656
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,48,48,128,1,float16,float16,0,0.026251199841499328
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,48,2,128,1,float16,float16,0,0.31517601013183594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,48,2,128,1,float16,fp8,0,0.3220560073852539
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,48,2,128,1,fp8,fp8,0,0.32200160026550295
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,48,4,128,1,float16,float16,0,0.31969599723815917
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,48,4,128,1,float16,fp8,0,0.321561598777771
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,48,4,128,1,fp8,fp8,0,0.3223952054977417
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,48,8,128,1,float16,float16,0,0.3282783985137939
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,48,8,128,1,float16,fp8,0,0.32182879447937013
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,48,2,128,1,float16,fp8,0,0.016624000668525696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,48,48,128,1,float16,float16,0,0.21930398941040039
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,48,4,128,1,float16,float16,0,0.01659200042486191
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,48,48,128,1,float16,fp8,0,0.20819199085235596
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,48,48,128,1,fp8,fp8,0,0.20839200019836426
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,48,1,128,1,float16,float16,0,0.1647663950920105
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,48,1,128,1,float16,fp8,0,0.1666159987449646
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,48,2,128,1,float16,float16,0,0.16485120058059693
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,48,2,128,1,float16,fp8,0,0.1666159987449646
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,48,2,128,1,fp8,fp8,0,0.16658879518508912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,48,4,128,1,float16,float16,0,0.16710400581359863
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,48,4,128,1,float16,fp8,0,0.01242400035262108
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,48,4,128,1,float16,fp8,0,0.16672320365905763
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,48,4,128,1,fp8,fp8,0,0.16671359539031982
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,48,8,128,1,float16,float16,0,0.17184799909591675
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,48,8,128,1,float16,fp8,0,0.16607999801635742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,48,48,128,1,float16,float16,0,0.11420639753341674
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,48,48,128,1,float16,fp8,0,0.1099552035331726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,48,1,128,1,float16,float16,0,0.0886240005493164
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,48,48,128,1,fp8,fp8,0,0.10919359922409058
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,48,1,128,1,float16,fp8,0,0.08798720240592957
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,48,8,128,1,float16,fp8,0,0.010820800065994262
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,48,1,128,1,fp8,fp8,0,0.08674079775810242
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,48,2,128,1,float16,float16,0,0.08879039883613586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,48,2,128,1,float16,fp8,0,0.08723679780960084
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,48,2,128,1,fp8,fp8,0,0.08726720213890075
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,48,4,128,1,float16,float16,0,0.09022240042686462
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,48,4,128,1,float16,fp8,0,0.08758080005645752
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,48,4,128,1,fp8,fp8,0,0.08727999925613403
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,48,8,128,1,float16,float16,0,0.09252640008926391
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,48,8,128,1,float16,fp8,0,0.0882319986820221
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,48,8,128,1,fp8,fp8,0,0.08758879899978637
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,48,48,128,1,float16,float16,0,0.0643504023551941
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,48,48,128,1,float16,fp8,0,0.05960320234298706
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,48,48,128,1,fp8,fp8,0,0.05975840091705322
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,48,1,128,1,float16,float16,0,0.048979198932647704
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,48,1,128,1,float16,fp8,0,0.04920159876346588
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,48,1,128,1,fp8,fp8,0,0.04909600019454956
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,48,2,128,1,float16,float16,0,0.049086400866508485
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,48,2,128,1,float16,fp8,0,0.04941920042037964
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,48,2,128,1,fp8,fp8,0,0.049404799938201904
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,48,4,128,1,float16,fp8,0,0.04920159876346588
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,48,8,128,1,float16,float16,0,0.05031359791755676
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,48,8,128,1,float16,fp8,0,0.048897600173950194
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,48,8,128,1,fp8,fp8,0,0.04899680018424988
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,48,48,128,1,float16,float16,0,0.033108800649642944
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,48,48,128,1,float16,fp8,0,0.03495199978351593
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,48,48,128,1,fp8,fp8,0,0.03495360016822815
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,48,1,128,1,float16,float16,0,0.02943359911441803
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,48,8,128,1,fp8,fp8,0,0.3222383975982666
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,48,1,128,1,float16,fp8,0,0.02893120050430298
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,48,1,128,1,fp8,fp8,0,0.02890239953994751
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,48,2,128,1,float16,float16,0,0.02953599989414215
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,48,2,128,1,float16,fp8,0,0.028966400027275085
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,48,2,128,1,fp8,fp8,0,0.02893120050430298
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,48,1,128,1,fp8,fp8,0,0.16666719913482667
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,48,4,128,1,float16,fp8,0,0.028996801376342772
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,48,4,128,1,fp8,fp8,0,0.028859201073646545
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,48,8,128,1,float16,float16,0,0.030774399638175964
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,48,8,128,1,float16,fp8,0,0.028961598873138428
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,48,8,128,1,fp8,fp8,0,0.028907200694084166
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,48,48,128,1,float16,float16,0,0.020644800364971162
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,48,48,128,1,float16,fp8,0,0.020641599595546723
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,48,48,128,1,fp8,fp8,0,0.020852799713611602
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,48,1,128,1,float16,float16,0,0.018588800728321076
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,48,1,128,1,float16,fp8,0,0.018563200533390046
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,48,1,128,1,fp8,fp8,0,0.01858240067958832
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,48,2,128,1,float16,float16,0,0.0186256006360054
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,48,2,128,1,float16,fp8,0,0.018670399487018586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,48,8,128,1,fp8,fp8,0,0.16659679412841796
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,48,2,128,1,fp8,fp8,0,0.018615999817848207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,48,4,128,1,float16,float16,0,0.018593600392341612
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,48,4,128,1,float16,fp8,0,0.018585599958896637
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,48,4,128,1,fp8,fp8,0,0.018643200397491455
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,48,8,128,1,float16,float16,0,0.018673600256443025
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,48,8,128,1,float16,fp8,0,0.018568000197410582
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,48,8,128,1,fp8,fp8,0,0.018606400489807128
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,48,48,128,1,float16,fp8,0,0.015409600734710694
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,48,48,128,1,fp8,fp8,0,0.016572800278663636
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,48,1,128,1,float16,float16,0,0.01449279934167862
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,48,1,128,1,float16,fp8,0,0.014611199498176575
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,48,1,128,1,fp8,fp8,0,0.01449279934167862
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,48,2,128,1,float16,float16,0,0.014526399970054626
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,48,2,128,1,float16,fp8,0,0.014577600359916686
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,48,1,128,1,fp8,fp8,0,0.32140159606933594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,48,4,128,1,float16,float16,0,0.014534400403499603
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,48,2,128,1,fp8,fp8,0,0.014507199823856353
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,48,4,128,1,float16,fp8,0,0.014584000408649444
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,48,4,128,1,fp8,fp8,0,0.014484800398349762
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,48,8,128,1,float16,float16,0,0.01448799967765808
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,48,8,128,1,float16,fp8,0,0.014603200554847717
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,48,8,128,1,fp8,fp8,0,0.014486399292945863
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,48,48,128,1,float16,float16,0,0.012443199753761292
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,48,48,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,48,1,128,1,float16,float16,0,0.010516799986362457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,48,48,128,1,fp8,fp8,0,0.011488000303506852
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,48,1,128,1,float16,fp8,0,0.010480000078678131
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,48,2,128,1,float16,float16,0,0.010385599732398988
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,48,1,128,1,fp8,fp8,0,0.010491199791431427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,48,2,128,1,float16,fp8,0,0.010473600029945374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,48,2,128,1,fp8,fp8,0,0.010494399815797806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,48,4,128,1,float16,float16,0,0.010463999956846238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,48,4,128,1,float16,fp8,0,0.010452800244092942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,48,4,128,1,fp8,fp8,0,0.010409600287675857
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,48,8,128,1,float16,float16,0,0.010411199927330018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,48,8,128,1,fp8,fp8,0,0.010556799918413162
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,48,8,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,48,48,128,1,float16,float16,0,0.01050880029797554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,48,4,128,1,fp8,fp8,0,0.04936639964580536
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,48,48,128,1,float16,fp8,0,0.010452800244092942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,48,48,128,1,fp8,fp8,0,0.010452800244092942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,48,1,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,48,1,128,1,float16,fp8,0,0.010444799810647965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,48,1,128,1,fp8,fp8,0,0.010497599840164185
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,48,2,128,1,float16,float16,0,0.010398399829864503
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,48,2,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,48,2,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,48,8,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,48,4,128,1,fp8,fp8,0,0.010411199927330018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,48,4,128,1,float16,float16,0,0.01037919968366623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,48,8,128,1,float16,fp8,0,0.010476800054311753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,48,8,128,1,fp8,fp8,0,0.010468800365924836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,48,1,128,1,float16,float16,0,0.2731503963470459
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,48,1,128,1,float16,fp8,0,0.27476160526275634
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,48,2,128,1,float16,float16,0,0.27440481185913085
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,48,4,128,1,float16,float16,0,0.02940160036087036
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,48,2,128,1,float16,fp8,0,0.2739487886428833
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,48,2,128,1,fp8,fp8,0,0.27456159591674806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,48,4,128,1,float16,float16,0,0.27576160430908203
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,48,4,128,1,fp8,fp8,0,0.2749840021133423
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,48,8,128,1,float16,float16,0,0.28066720962524416
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,48,48,128,1,float16,float16,0,0.016543999314308167
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,48,8,128,1,float16,fp8,0,0.2745039939880371
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,48,8,128,1,fp8,fp8,0,0.2749696016311646
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,48,48,128,1,float16,float16,0,0.16968480348587037
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,48,48,128,1,float16,fp8,0,0.16422560214996337
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,48,48,128,1,fp8,fp8,0,0.16417280435562134
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,48,1,128,1,float16,float16,0,0.14448319673538207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,48,1,128,1,float16,fp8,0,0.14245599508285522
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,48,1,128,1,fp8,fp8,0,0.14238879680633545
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,48,2,128,1,float16,float16,0,0.14383840560913086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,48,2,128,1,float16,fp8,0,0.14192960262298585
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,48,2,128,1,fp8,fp8,0,0.14239039421081542
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,48,4,128,1,float16,float16,0,0.14479520320892333
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,48,4,128,1,float16,fp8,0,0.1418287992477417
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,48,4,128,1,fp8,fp8,0,0.1426144003868103
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,48,4,128,1,float16,float16,0,0.049384000897407535
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,48,8,128,1,float16,float16,0,0.14747040271759032
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,48,8,128,1,float16,fp8,0,0.14262239933013915
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,48,8,128,1,fp8,fp8,0,0.14329919815063477
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,48,48,128,1,float16,float16,0,0.09171199798583984
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,48,48,128,1,float16,fp8,0,0.08655999898910523
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,48,48,128,1,fp8,fp8,0,0.0867904007434845
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,48,1,128,1,float16,float16,0,0.07762079834938049
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,48,1,128,1,float16,fp8,0,0.07529760003089905
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,48,1,128,1,fp8,fp8,0,0.07511199712753296
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,48,2,128,1,float16,float16,0,0.07705600261688232
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,48,2,128,1,fp8,fp8,0,0.07548959851264954
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,48,4,128,1,float16,float16,0,0.07754080295562744
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,48,4,128,1,fp8,fp8,0,0.07571520209312439
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,48,8,128,1,float16,float16,0,0.07890560030937195
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,48,8,128,1,float16,fp8,0,0.07553600072860718
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,48,8,128,1,fp8,fp8,0,0.07541919946670532
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,48,1,128,1,fp8,fp8,0,0.2734031915664673
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,48,48,128,1,float16,float16,0,0.04813759922981262
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,48,48,128,1,float16,fp8,0,0.047391998767852786
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,48,48,128,1,fp8,fp8,0,0.04734399914741516
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,48,1,128,1,float16,float16,0,0.044119998812675476
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,48,1,128,1,float16,fp8,0,0.042936000227928164
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,48,1,128,1,fp8,fp8,0,0.04236479997634888
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,48,2,128,1,float16,fp8,0,0.04283359944820404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,48,2,128,1,fp8,fp8,0,0.0424591988325119
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,48,2,128,1,float16,float16,0,0.044228801131248476
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,48,4,128,1,float16,float16,0,0.043859198689460754
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,48,4,128,1,float16,fp8,0,0.04274719953536987
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,48,4,128,1,fp8,fp8,0,0.04243519902229309
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,48,8,128,1,float16,float16,0,0.04483680129051208
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,48,8,128,1,float16,fp8,0,0.04256480038166046
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,48,48,128,1,float16,float16,0,0.0288239985704422
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,48,48,128,1,float16,fp8,0,0.02892000079154968
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,48,48,128,1,fp8,fp8,0,0.028951999545097352
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,48,1,128,1,float16,float16,0,0.026785600185394286
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,48,1,128,1,float16,fp8,0,0.026767998933792114
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,48,4,128,1,float16,fp8,0,0.27391839027404785
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,48,1,128,1,fp8,fp8,0,0.02688319981098175
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,48,2,128,1,float16,float16,0,0.026767998933792114
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,48,2,128,1,float16,fp8,0,0.0267984002828598
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,48,2,128,1,fp8,fp8,0,0.026812800765037538
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,48,4,128,1,float16,float16,0,0.02688640058040619
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,48,4,128,1,float16,fp8,0,0.02672159969806671
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,48,4,128,1,fp8,fp8,0,0.026844799518585205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,48,8,128,1,float16,float16,0,0.026958400011062623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,48,8,128,1,fp8,fp8,0,0.02672800123691559
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,48,48,128,1,float16,float16,0,0.018587200343608855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,48,48,128,1,float16,fp8,0,0.01868479996919632
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,48,48,128,1,fp8,fp8,0,0.01857919991016388
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,48,1,128,1,float16,fp8,0,0.01664000004529953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,48,1,128,1,fp8,fp8,0,0.016551999747753142
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,48,2,128,1,float16,float16,0,0.016641600430011748
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,48,2,128,1,float16,fp8,0,0.016689600050449373
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,48,2,128,1,fp8,fp8,0,0.016648000478744505
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,48,4,128,1,float16,float16,0,0.01679999977350235
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,48,4,128,1,float16,fp8,0,0.016631999611854555
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,48,4,128,1,fp8,fp8,0,0.016599999368190767
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,48,8,128,1,float16,float16,0,0.01661760061979294
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,48,8,128,1,float16,fp8,0,0.01663520038127899
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,48,8,128,1,fp8,fp8,0,0.01659359931945801
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,48,48,128,1,float16,float16,0,0.014483200013637542
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,48,48,128,1,float16,fp8,0,0.014580799639225006
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,48,1,128,1,float16,float16,0,0.012857599556446076
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,48,1,128,1,float16,fp8,0,0.013467200100421906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,48,1,128,1,fp8,fp8,0,0.013150399923324585
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,48,2,128,1,float16,float16,0,0.01282079964876175
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,48,2,128,1,float16,fp8,0,0.012729600071907043
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,48,2,128,1,fp8,fp8,0,0.01252799928188324
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,48,4,128,1,float16,float16,0,0.0125231996178627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,48,4,128,1,float16,fp8,0,0.012652799487113953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,48,4,128,1,fp8,fp8,0,0.01387999951839447
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,48,8,128,1,float16,float16,0,0.013331200182437896
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,48,8,128,1,float16,fp8,0,0.013841600716114044
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,48,4,128,1,float16,fp8,0,0.010462400317192078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,48,8,128,1,fp8,fp8,0,0.01369439959526062
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,48,4,128,1,float16,fp8,0,0.07521119713783264
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,48,48,128,1,float16,fp8,0,0.010398399829864503
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,48,48,128,1,float16,float16,0,0.012511999905109405
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,48,48,128,1,fp8,fp8,0,0.010497599840164185
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,48,1,128,1,float16,float16,0,0.010486400127410889
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,48,1,128,1,float16,fp8,0,0.0104032002389431
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,48,2,128,1,float16,float16,0,0.01040479987859726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,48,2,128,1,float16,fp8,0,0.010412800312042236
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,48,2,128,1,fp8,fp8,0,0.010467199981212616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,48,4,128,1,float16,float16,0,0.010419200360774993
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,48,8,128,1,fp8,fp8,0,0.0429280012845993
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,48,4,128,1,float16,fp8,0,0.010414399951696397
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,48,4,128,1,fp8,fp8,0,0.010476800054311753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,48,8,128,1,float16,float16,0,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,48,8,128,1,float16,fp8,0,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,48,8,128,1,fp8,fp8,0,0.010390400141477584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,48,48,128,1,float16,float16,0,0.010489600151777268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,48,48,128,1,float16,fp8,0,0.010358399897813796
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,48,48,128,1,fp8,fp8,0,0.01050880029797554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,48,1,128,1,float16,float16,0,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,48,8,128,1,float16,fp8,0,0.02675839960575104
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,48,1,128,1,float16,fp8,0,0.010502400249242783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,48,1,128,1,fp8,fp8,0,0.010371199995279311
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,48,2,128,1,float16,float16,0,0.01032480001449585
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,48,1,128,1,float16,float16,0,0.016761599481105803
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,48,4,128,1,float16,float16,0,0.010441599786281586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,48,4,128,1,float16,fp8,0,0.010407999902963639
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,48,4,128,1,fp8,fp8,0,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,48,8,128,1,float16,float16,0,0.010393600165843963
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,48,8,128,1,float16,fp8,0,0.010367999970912933
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,48,8,128,1,fp8,fp8,0,0.010363200306892395
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,48,48,128,1,fp8,fp8,0,0.014486399292945863
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,48,1,128,1,float16,float16,0,0.26922879219055174
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,48,1,128,1,float16,fp8,0,0.25362560749053953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,48,1,128,1,fp8,fp8,0,0.2533008098602295
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,48,2,128,1,float16,float16,0,0.26797120571136473
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,48,2,128,1,float16,fp8,0,0.2543535947799683
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,48,2,128,1,fp8,fp8,0,0.25330400466918945
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,48,4,128,1,float16,float16,0,0.26866879463195803
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,48,4,128,1,float16,fp8,0,0.253873610496521
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,48,4,128,1,fp8,fp8,0,0.25298080444335935
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,48,2,128,1,float16,fp8,0,0.07558720111846924
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,48,8,128,1,float16,float16,0,0.27013120651245115
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,48,8,128,1,float16,fp8,0,0.25464160442352296
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,48,8,128,1,fp8,fp8,0,0.25392000675201415
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,48,48,128,1,float16,float16,0,0.15263999700546266
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,48,48,128,1,float16,fp8,0,0.14366079568862916
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,48,48,128,1,fp8,fp8,0,0.14418400526046754
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,48,1,128,1,float16,float16,0,0.1382591962814331
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,48,1,128,1,float16,fp8,0,0.13265600204467773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,48,1,128,1,fp8,fp8,0,0.010471999645233154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,48,2,128,1,float16,float16,0,0.13926080465316773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,48,2,128,1,float16,fp8,0,0.13209439516067506
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,48,2,128,1,fp8,fp8,0,0.13238400220870972
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,48,4,128,1,float16,float16,0,0.13874239921569825
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,48,4,128,1,float16,fp8,0,0.1325503945350647
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,48,4,128,1,fp8,fp8,0,0.1317744016647339
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,48,8,128,1,float16,float16,0,0.13995039463043213
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,48,8,128,1,float16,fp8,0,0.1319200038909912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,48,8,128,1,fp8,fp8,0,0.13242559432983397
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,48,48,128,1,float16,float16,0,0.08135520219802857
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,48,48,128,1,float16,fp8,0,0.07631999850273133
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,48,48,128,1,fp8,fp8,0,0.07594559788703918
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,48,1,128,1,float16,float16,0,0.07647519707679748
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,48,1,128,1,fp8,fp8,0,0.07065119743347167
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,48,2,128,1,float16,float16,0,0.07537760138511658
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,48,2,128,1,float16,fp8,0,0.07139520049095154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,48,2,128,1,fp8,fp8,0,0.07085440158843995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,48,4,128,1,float16,float16,0,0.07581120133399963
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,48,4,128,1,float16,fp8,0,0.07125440239906311
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,48,4,128,1,fp8,fp8,0,0.07095040082931518
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,48,8,128,1,float16,float16,0,0.07597439885139465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,48,2,128,1,fp8,fp8,0,0.010424000024795533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,48,8,128,1,float16,fp8,0,0.07111999988555909
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,48,8,128,1,fp8,fp8,0,0.07077599763870239
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,48,48,128,1,float16,float16,0,0.04531359970569611
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,48,48,128,1,fp8,fp8,0,0.043326398730278014
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,48,1,128,1,float16,fp8,0,0.04036639928817749
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,48,1,128,1,fp8,fp8,0,0.040638399124145505
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,48,2,128,1,float16,fp8,0,0.040612798929214475
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,48,2,128,1,fp8,fp8,0,0.0403903990983963
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,48,4,128,1,float16,float16,0,0.04357599914073944
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,48,4,128,1,float16,fp8,0,0.04094560146331787
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,48,4,128,1,fp8,fp8,0,0.040775999426841736
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,48,8,128,1,float16,float16,0,0.0437855988740921
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,48,8,128,1,float16,fp8,0,0.040934398770332336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,48,8,128,1,fp8,fp8,0,0.041089600324630736
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,48,48,128,1,float16,float16,0,0.02768639922142029
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,48,48,128,1,float16,fp8,0,0.02680160105228424
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,48,48,128,1,fp8,fp8,0,0.026862400770187377
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,48,1,128,1,float16,float16,0,0.026737600564956665
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,48,1,128,1,float16,fp8,0,0.024860799312591553
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,48,1,128,1,fp8,fp8,0,0.024750399589538574
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,48,2,128,1,float16,float16,0,0.026630398631095887
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,48,2,128,1,float16,fp8,0,0.024721600115299225
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,48,2,128,1,fp8,fp8,0,0.024792000651359558
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,48,4,128,1,float16,float16,0,0.026795199513435362
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,48,4,128,1,float16,fp8,0,0.024820800125598907
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,48,4,128,1,fp8,fp8,0,0.024830399453639983
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,48,8,128,1,float16,float16,0,0.02664639949798584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,48,8,128,1,float16,fp8,0,0.024758400022983552
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,48,1,128,1,fp8,fp8,0,0.13208320140838622
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,48,8,128,1,fp8,fp8,0,0.024743999540805816
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,48,48,128,1,float16,float16,0,0.018680000305175783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,48,48,128,1,float16,fp8,0,0.01664319932460785
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,48,48,128,1,fp8,fp8,0,0.016564799845218657
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,48,1,128,1,float16,float16,0,0.016631999611854555
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,48,1,128,1,float16,fp8,0,0.016598400473594666
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,48,1,128,1,fp8,fp8,0,0.01655679941177368
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,48,2,128,1,float16,float16,0,0.016568000614643096
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,48,2,128,1,fp8,fp8,0,0.016569599509239197
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,48,1,128,1,float16,fp8,0,0.070660799741745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,48,4,128,1,float16,float16,0,0.016606399416923524
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,48,4,128,1,float16,fp8,0,0.016577599942684172
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,48,4,128,1,fp8,fp8,0,0.016575999557971954
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,48,8,128,1,float16,float16,0,0.0166143998503685
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,48,8,128,1,float16,fp8,0,0.016603200137615202
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,48,8,128,1,fp8,fp8,0,0.016616000235080718
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,48,48,128,1,float16,float16,0,0.014537599682807923
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,48,2,128,1,float16,fp8,0,0.010452800244092942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,48,48,128,1,float16,fp8,0,0.012532800436019897
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,48,48,128,1,fp8,fp8,0,0.012489599734544754
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,48,1,128,1,float16,float16,0,0.012638400495052337
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,48,1,128,1,float16,float16,0,0.04353919923305512
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,48,2,128,1,float16,float16,0,0.012828800082206725
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,48,1,128,1,fp8,fp8,0,0.012483199685811996
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,48,2,128,1,float16,fp8,0,0.012515200674533844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,48,2,128,1,float16,float16,0,0.04350239932537079
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,48,4,128,1,float16,float16,0,0.012508800625801087
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,48,4,128,1,float16,fp8,0,0.012489599734544754
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,48,4,128,1,fp8,fp8,0,0.012544000148773193
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,48,8,128,1,float16,float16,0,0.012740799784660339
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,48,8,128,1,fp8,fp8,0,0.012516799569129943
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,48,48,128,1,float16,float16,0,0.011036799848079681
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,48,48,128,1,float16,fp8,0,0.010499200224876404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,48,48,128,1,fp8,fp8,0,0.01045600026845932
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,48,1,128,1,float16,float16,0,0.0104032002389431
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,48,1,128,1,float16,fp8,0,0.010500799864530563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,48,2,128,1,float16,float16,0,0.01040479987859726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,48,1,128,1,fp8,fp8,0,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,48,2,128,1,float16,fp8,0,0.010518400371074677
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,48,2,128,1,fp8,fp8,0,0.010371199995279311
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,48,4,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,48,4,128,1,float16,fp8,0,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,48,8,128,1,float16,float16,0,0.010377600044012069
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,48,8,128,1,float16,fp8,0,0.010520000010728836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,48,8,128,1,fp8,fp8,0,0.010465600341558457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,48,48,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,48,48,128,1,float16,fp8,0,0.010480000078678131
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,48,48,128,1,fp8,fp8,0,0.010371199995279311
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,48,1,128,1,float16,float16,0,0.010497599840164185
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,48,1,128,1,float16,fp8,0,0.01045759990811348
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,48,1,128,1,fp8,fp8,0,0.008542399853467941
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,48,2,128,1,float16,fp8,0,0.008508799970149994
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,48,2,128,1,fp8,fp8,0,0.008430399745702744
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,48,4,128,1,float16,float16,0,0.009046400338411332
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,48,4,128,1,float16,fp8,0,0.008865600079298019
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,48,4,128,1,fp8,fp8,0,0.010313600301742554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,48,8,128,1,float16,float16,0,0.010468800365924836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,48,8,128,1,float16,fp8,0,0.010382399708032609
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,48,8,128,1,fp8,fp8,0,0.010070399940013885
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,48,1,128,1,float16,float16,0,0.26300640106201173
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,48,1,128,1,float16,fp8,0,0.012529599666595458
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,48,48,128,1,float16,fp8,0,0.04320000112056732
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,48,1,128,1,float16,fp8,0,0.24539520740509033
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,48,2,128,1,fp8,fp8,0,0.012561599910259246
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,48,1,128,1,fp8,fp8,0,0.24498240947723388
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,48,8,128,1,float16,fp8,0,0.012505599856376648
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,48,2,128,1,float16,float16,0,0.2603584051132202
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,48,2,128,1,fp8,fp8,0,0.24525439739227295
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,48,4,128,1,float16,float16,0,0.260369610786438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,48,4,128,1,fp8,fp8,0,0.24561760425567628
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,48,8,128,1,float16,float16,0,0.2615904092788696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,48,8,128,1,float16,fp8,0,0.245086407661438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,48,48,128,1,float16,float16,0,0.13855040073394775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,48,8,128,1,fp8,fp8,0,0.24525279998779298
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,48,4,128,1,fp8,fp8,0,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,48,48,128,1,float16,fp8,0,0.12920160293579103
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,48,48,128,1,fp8,fp8,0,0.12859519720077514
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,48,1,128,1,float16,float16,0,0.13712799549102783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,48,1,128,1,fp8,fp8,0,0.12831200361251832
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,48,1,128,1,float16,fp8,0,0.1288175940513611
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,48,2,128,1,float16,float16,0,0.1374848008155823
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,48,2,128,1,float16,fp8,0,0.1282256007194519
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,48,2,128,1,fp8,fp8,0,0.12838239669799806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,48,4,128,1,float16,float16,0,0.13885600566864015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,48,4,128,1,float16,fp8,0,0.1282080054283142
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,48,4,128,1,fp8,fp8,0,0.12833280563354493
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,48,8,128,1,float16,float16,0,0.13694080114364623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,48,8,128,1,float16,fp8,0,0.12843359708786012
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,48,8,128,1,fp8,fp8,0,0.1280832052230835
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,48,48,128,1,float16,fp8,0,0.06915040016174316
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,48,2,128,1,float16,float16,0,0.010440000146627427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,48,48,128,1,fp8,fp8,0,0.07017120122909545
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,48,1,128,1,float16,float16,0,0.07480480074882508
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,48,1,128,1,float16,fp8,0,0.06907520294189454
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,48,1,128,1,fp8,fp8,0,0.06891040205955505
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,48,2,128,1,float16,fp8,0,0.06978880167007447
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,48,2,128,1,float16,float16,0,0.07547360062599182
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,48,2,128,1,fp8,fp8,0,0.06926239728927612
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,48,4,128,1,float16,float16,0,0.0745743989944458
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,48,4,128,1,fp8,fp8,0,0.06895040273666382
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,48,8,128,1,float16,float16,0,0.07470560073852539
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,48,8,128,1,float16,fp8,0,0.06895359754562377
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,48,8,128,1,fp8,fp8,0,0.06938080191612243
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,48,48,128,1,float16,float16,0,0.0432559996843338
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,48,48,128,1,fp8,fp8,0,0.039087998867034915
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,48,48,128,1,float16,fp8,0,0.03932160139083862
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,48,1,128,1,float16,float16,0,0.0416047990322113
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,48,1,128,1,float16,fp8,0,0.03913759887218475
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,48,1,128,1,fp8,fp8,0,0.039136001467704774
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,48,2,128,1,float16,float16,0,0.0425247997045517
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,48,2,128,1,float16,fp8,0,0.039155200123786926
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,48,2,128,1,fp8,fp8,0,0.03916159868240356
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,48,4,128,1,float16,float16,0,0.04241760075092316
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,48,4,128,1,float16,fp8,0,0.03917439877986908
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,48,4,128,1,fp8,fp8,0,0.03914720118045807
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,48,8,128,1,float16,float16,0,0.04224959909915924
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,48,8,128,1,float16,fp8,0,0.03915359973907471
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,48,8,128,1,fp8,fp8,0,0.039136001467704774
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,48,48,128,1,float16,float16,0,0.02677280008792877
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,48,48,128,1,float16,fp8,0,0.02484000027179718
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,48,48,128,1,fp8,fp8,0,0.024723200500011443
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,48,1,128,1,float16,float16,0,0.02481600046157837
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,48,1,128,1,float16,fp8,0,0.024766400456428528
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,48,2,128,1,float16,fp8,0,0.24527521133422853
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,48,1,128,1,fp8,fp8,0,0.024726399779319765
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,48,2,128,1,float16,float16,0,0.025012800097465517
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,48,2,128,1,float16,fp8,0,0.024804799258708952
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,48,2,128,1,fp8,fp8,0,0.02481279969215393
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,48,4,128,1,float16,float16,0,0.025060799717903138
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,48,4,128,1,float16,fp8,0,0.02481119930744171
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,48,8,128,1,float16,float16,0,0.02494879961013794
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,48,4,128,1,float16,fp8,0,0.24510719776153564
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,48,8,128,1,float16,fp8,0,0.024743999540805816
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,48,8,128,1,fp8,fp8,0,0.024692800641059876
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,48,48,128,1,float16,float16,0,0.018475200235843658
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,48,48,128,1,float16,fp8,0,0.016582399606704712
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,48,48,128,1,fp8,fp8,0,0.01652639955282211
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,48,1,128,1,float16,float16,0,0.016604800522327424
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,48,1,128,1,float16,fp8,0,0.016607999801635742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,48,1,128,1,fp8,fp8,0,0.016497600078582763
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,48,2,128,1,float16,float16,0,0.016603200137615202
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,48,2,128,1,float16,fp8,0,0.016577599942684172
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,48,4,128,1,float16,float16,0,0.016572800278663636
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,48,4,128,1,float16,fp8,0,0.016622400283813475
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,48,4,128,1,fp8,fp8,0,0.016091200709342956
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,48,8,128,1,float16,float16,0,0.016625599563121797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,48,8,128,1,float16,fp8,0,0.016627199947834015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,48,8,128,1,fp8,fp8,0,0.01663679927587509
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,48,48,128,1,float16,float16,0,0.014830400049686433
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,48,48,128,1,float16,fp8,0,0.012561599910259246
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,48,48,128,1,fp8,fp8,0,0.012507200241088867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,48,1,128,1,float16,float16,0,0.012531200051307678
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,48,1,128,1,float16,fp8,0,0.012521600723266602
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,48,1,128,1,fp8,fp8,0,0.012531200051307678
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,48,2,128,1,float16,float16,0,0.013435199856758118
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,48,48,128,1,float16,float16,0,0.07602559924125671
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,48,2,128,1,fp8,fp8,0,0.012569600343704223
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,48,4,128,1,float16,float16,0,0.012868799269199371
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,48,2,128,1,float16,fp8,0,0.012539200484752655
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,48,4,128,1,fp8,fp8,0,0.012460800260305405
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,48,8,128,1,float16,float16,0,0.012596799433231354
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,48,8,128,1,float16,fp8,0,0.012491200119256973
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,48,8,128,1,fp8,fp8,0,0.012537600100040435
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,48,48,128,1,float16,float16,0,0.012470400333404541
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,48,4,128,1,float16,fp8,0,0.06958400011062622
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,48,48,128,1,fp8,fp8,0,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,48,1,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,48,1,128,1,float16,float16,0,0.01045759990811348
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,48,1,128,1,fp8,fp8,0,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,48,2,128,1,float16,float16,0,0.010463999956846238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,48,2,128,1,float16,fp8,0,0.010419200360774993
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,48,2,128,1,float16,fp8,0,0.016574400663375854
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,48,2,128,1,fp8,fp8,0,0.009446399658918381
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,48,4,128,1,float16,float16,0,0.010406400263309478
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,48,4,128,1,float16,fp8,0,0.01040000021457672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,48,4,128,1,fp8,fp8,0,0.010435199737548828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,48,8,128,1,float16,fp8,0,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,48,8,128,1,float16,float16,0,0.010502400249242783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,48,8,128,1,fp8,fp8,0,0.010447999835014344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,48,48,128,1,float16,float16,0,0.010470400005578995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,48,48,128,1,fp8,fp8,0,0.00859839990735054
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,48,1,128,1,float16,float16,0,0.01040000021457672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,48,1,128,1,float16,fp8,0,0.00840959995985031
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,48,1,128,1,fp8,fp8,0,0.008472000062465668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,48,2,128,1,float16,float16,0,0.008505599945783615
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,48,2,128,1,float16,fp8,0,0.008478400111198426
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,48,2,128,1,fp8,fp8,0,0.008481600135564805
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,48,4,128,1,float16,float16,0,0.009369599819183349
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,48,4,128,1,float16,fp8,0,0.009436800330877303
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,48,4,128,1,fp8,fp8,0,0.008479999750852585
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,48,8,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,48,8,128,1,float16,fp8,0,0.008452799916267396
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,48,8,128,1,fp8,fp8,0,0.009399999678134919
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,48,4,128,1,fp8,fp8,0,0.024801599979400634
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,48,2,128,1,fp8,fp8,0,0.016527999937534333
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,48,4,128,1,float16,fp8,0,0.012476799637079239
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,48,48,128,1,float16,fp8,0,0.010385599732398988
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,48,48,128,1,float16,fp8,0,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,1,128,1,float16,fp8,0,14.265415954589844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,1,128,1,fp8,fp8,0,14.496995544433593
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,2,128,1,fp8,fp8,0,14.319573974609375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,2,128,1,float16,fp8,0,14.46961669921875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,1,128,1,float16,float16,0,18.31432189941406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,2,128,1,float16,float16,0,18.313189697265624
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,4,128,1,float16,float16,0,18.47380676269531
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,4,128,1,float16,fp8,0,14.281111145019532
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,4,128,1,fp8,fp8,0,14.792582702636718
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,40,128,1,float16,fp8,0,7.461473846435547
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,40,128,1,fp8,fp8,0,7.489769744873047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,8,128,1,float16,fp8,0,14.799493408203125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,8,128,1,fp8,fp8,0,14.94818572998047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,1,128,1,float16,float16,0,9.537764739990234
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,8,128,1,float16,float16,0,19.013626098632812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,1,128,1,float16,fp8,0,7.3385871887207035
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,1,128,1,fp8,fp8,0,7.208290863037109
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,2,128,1,float16,fp8,0,7.315528106689453
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,2,128,1,fp8,fp8,0,7.3799278259277346
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,2,128,1,float16,float16,0,9.26328353881836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,4,128,1,float16,float16,0,9.2858642578125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,4,128,1,float16,fp8,0,7.398359680175782
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,4,128,1,fp8,fp8,0,7.435912322998047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,40,128,1,fp8,fp8,0,3.8333984375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,40,128,1,float16,fp8,0,3.9770881652832033
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,8,128,1,float16,float16,0,9.632881927490235
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,8,128,1,float16,fp8,0,7.360990142822265
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,8,128,1,fp8,fp8,0,7.4509330749511715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,1,128,1,float16,float16,0,4.693836975097656
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,1,128,1,fp8,fp8,0,3.5709457397460938
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,1,128,1,float16,fp8,0,3.6130912780761717
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,2,128,1,float16,fp8,0,3.700297546386719
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,2,128,1,fp8,fp8,0,3.7520687103271486
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,2,128,1,float16,float16,0,4.652780914306641
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,4,128,1,float16,fp8,0,3.6597023010253906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,4,128,1,fp8,fp8,0,3.785182571411133
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,4,128,1,float16,float16,0,4.682211303710938
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,8,128,1,float16,fp8,0,3.7284847259521485
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,8,128,1,float16,float16,0,4.709196853637695
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,8,128,1,fp8,fp8,0,3.875076675415039
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,40,128,1,float16,fp8,0,2.002620887756348
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,40,128,1,fp8,fp8,0,2.170342445373535
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,1,128,1,float16,float16,0,2.2793359756469727
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,1,128,1,fp8,fp8,0,1.8537120819091797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,1,128,1,float16,fp8,0,2.298828887939453
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,2,128,1,float16,float16,0,2.2410863876342773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,2,128,1,float16,fp8,0,1.876755142211914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,2,128,1,fp8,fp8,0,2.17053279876709
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,4,128,1,float16,float16,0,2.123944091796875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,4,128,1,float16,fp8,0,1.8549135208129883
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,4,128,1,fp8,fp8,0,2.273553657531738
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,8,128,1,float16,float16,0,2.2163087844848635
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,8,128,1,float16,fp8,0,1.8958400726318358
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,8,128,1,fp8,fp8,0,2.142532730102539
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,1,128,1,float16,fp8,0,8.546318054199219
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,1,128,1,fp8,fp8,0,8.419857788085938
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,1,128,1,float16,float16,0,10.539389038085938
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,2,128,1,float16,fp8,0,8.494652557373048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,2,128,1,float16,float16,0,10.792857360839843
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,2,128,1,fp8,fp8,0,8.623337554931641
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,4,128,1,float16,fp8,0,8.49056167602539
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,4,128,1,float16,float16,0,10.941873931884766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,4,128,1,fp8,fp8,0,8.49651870727539
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,8,128,1,float16,float16,0,10.903028869628907
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,8,128,1,float16,fp8,0,8.683172607421875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,40,128,1,float16,fp8,0,4.298542404174805
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,8,128,1,fp8,fp8,0,8.508273315429687
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,40,128,1,fp8,fp8,0,4.5502479553222654
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,1,128,1,float16,fp8,0,4.109243011474609
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,1,128,1,fp8,fp8,0,4.260305786132813
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,1,128,1,float16,float16,0,5.4977264404296875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,2,128,1,float16,float16,0,5.39703369140625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,2,128,1,float16,fp8,0,4.176574325561523
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,2,128,1,fp8,fp8,0,4.251497650146485
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,4,128,1,float16,float16,0,5.461740875244141
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,4,128,1,float16,fp8,0,4.422638320922852
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,4,128,1,fp8,fp8,0,4.314297485351562
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,8,128,1,float16,float16,0,5.488391876220703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,8,128,1,float16,fp8,0,4.481955337524414
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,8,128,1,fp8,fp8,0,4.292270278930664
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,40,128,1,float16,fp8,0,2.4396736145019533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,40,128,1,fp8,fp8,0,2.274603271484375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,1,128,1,float16,float16,0,2.601153564453125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,1,128,1,float16,fp8,0,2.0975616455078123
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,1,128,1,fp8,fp8,0,2.10907039642334
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,2,128,1,float16,float16,0,2.721574401855469
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,2,128,1,float16,fp8,0,2.1019968032836913
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,2,128,1,fp8,fp8,0,2.123092842102051
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,4,128,1,float16,float16,0,2.7324752807617188
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,4,128,1,float16,fp8,0,2.1280431747436523
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,4,128,1,fp8,fp8,0,2.1226272583007812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,8,128,1,float16,fp8,0,2.1235727310180663
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,8,128,1,float16,float16,0,2.6714527130126955
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,8,128,1,fp8,fp8,0,2.158355140686035
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,40,128,1,float16,fp8,0,1.3686832427978515
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,40,128,1,fp8,fp8,0,1.191055965423584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,1,128,1,float16,fp8,0,1.1308464050292968
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,1,128,1,float16,float16,0,1.2523712158203124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,1,128,1,fp8,fp8,0,1.1126303672790527
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,2,128,1,float16,float16,0,1.2695311546325683
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,2,128,1,float16,fp8,0,1.3103520393371582
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,2,128,1,fp8,fp8,0,1.16561279296875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,4,128,1,float16,float16,0,1.2607728004455567
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,4,128,1,float16,fp8,0,1.077184009552002
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,4,128,1,fp8,fp8,0,1.1875632286071778
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,8,128,1,float16,float16,0,1.2477231979370118
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,8,128,1,float16,fp8,0,1.1401968002319336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,8,128,1,fp8,fp8,0,1.1212448120117187
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,1,128,1,float16,fp8,0,5.885704040527344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,1,128,1,float16,float16,0,7.653215789794922
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,1,128,1,fp8,fp8,0,5.993345642089844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,2,128,1,float16,float16,0,7.716912078857422
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,2,128,1,float16,fp8,0,6.18328628540039
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,2,128,1,fp8,fp8,0,6.069398498535156
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,4,128,1,float16,float16,0,7.8001762390136715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,4,128,1,float16,fp8,0,6.01318244934082
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,4,128,1,fp8,fp8,0,6.190702438354492
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,8,128,1,float16,float16,0,7.780689239501953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,8,128,1,float16,fp8,0,6.073124694824219
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,8,128,1,fp8,fp8,0,6.152616119384765
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,40,128,1,float16,fp8,0,3.236985778808594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,40,128,1,fp8,fp8,0,3.2169456481933594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,1,128,1,float16,float16,0,3.701059341430664
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,1,128,1,float16,fp8,0,3.3138687133789064
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,1,128,1,fp8,fp8,0,3.0268863677978515
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,2,128,1,float16,float16,0,3.744044876098633
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,2,128,1,float16,fp8,0,3.2484832763671876
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,40,128,1,float16,float16,0,2.6143728256225587
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,40,128,1,float16,float16,0,1.3084015846252441
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,2,128,1,fp8,fp8,0,2.951215934753418
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,40,128,1,float16,float16,0,3.871495819091797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,40,128,1,float16,float16,0,5.352862548828125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,40,128,1,float16,float16,0,2.276246452331543
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,40,128,1,float16,float16,0,4.727072143554688
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,4,128,1,float16,fp8,0,2.9921344757080077
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,4,128,1,float16,float16,0,3.669553756713867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,4,128,1,fp8,fp8,0,3.07598876953125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,40,128,1,float16,fp8,0,1.5886832237243653
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,8,128,1,float16,fp8,0,3.0971263885498046
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,40,128,1,float16,float16,0,9.542616271972657
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,40,128,1,float16,float16,0,2.063929557800293
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,8,128,1,fp8,fp8,0,3.1392351150512696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,40,128,1,fp8,fp8,0,1.805790328979492
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,8,128,1,float16,float16,0,3.6826175689697265
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,1,128,1,float16,float16,0,1.7717679977416991
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,1,128,1,float16,fp8,0,1.4870240211486816
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,1,128,1,fp8,fp8,0,1.5039183616638183
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,2,128,1,fp8,fp8,0,1.5058367729187012
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,2,128,1,float16,fp8,0,1.6491247177124024
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,4,128,1,float16,float16,0,1.7554832458496095
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,4,128,1,float16,fp8,0,1.7457872390747071
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,2,128,1,float16,float16,0,1.829867172241211
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,8,128,1,float16,fp8,0,1.4822815895080566
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,40,128,1,float16,float16,0,0.9486448287963867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,8,128,1,float16,float16,0,1.7120479583740233
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,4,128,1,fp8,fp8,0,1.8949024200439453
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,40,128,1,float16,fp8,0,1.0175552368164062
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,1,128,1,float16,float16,0,0.9664143562316895
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,8,128,1,fp8,fp8,0,1.48852481842041
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,1,128,1,float16,fp8,0,0.7825263977050781
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,1,128,1,fp8,fp8,0,0.8479104042053223
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,2,128,1,float16,float16,0,0.9728015899658203
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,2,128,1,float16,fp8,0,0.9412032127380371
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,2,128,1,fp8,fp8,0,0.9066160202026368
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,4,128,1,float16,float16,0,0.9324095726013184
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,4,128,1,float16,fp8,0,0.7806848049163818
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,4,128,1,fp8,fp8,0,0.7799088001251221
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,8,128,1,float16,float16,0,0.8905887603759766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,8,128,1,fp8,fp8,0,0.7973631858825684
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,8,128,1,float16,fp8,0,0.9275600433349609
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,40,128,1,fp8,fp8,0,0.8630784034729004
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,1,128,1,fp8,fp8,0,7.718096160888672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,1,128,1,float16,fp8,0,7.827639770507813
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,2,128,1,float16,fp8,0,7.841697692871094
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,2,128,1,fp8,fp8,0,7.898948669433594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,1,128,1,float16,float16,0,9.811993408203126
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,2,128,1,float16,float16,0,9.844510650634765
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,4,128,1,float16,fp8,0,7.713152313232422
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,4,128,1,float16,float16,0,10.034187316894531
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,40,128,1,fp8,fp8,0,4.191315078735352
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,40,128,1,float16,float16,0,5.341036987304688
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,4,128,1,fp8,fp8,0,7.870555114746094
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,1,128,1,float16,float16,0,4.8780464172363285
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,8,128,1,float16,fp8,0,7.925367736816407
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,8,128,1,fp8,fp8,0,8.115715026855469
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,8,128,1,float16,float16,0,10.091970825195313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,40,128,1,float16,fp8,0,4.290254211425781
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,1,128,1,float16,fp8,0,3.8685455322265625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,1,128,1,fp8,fp8,0,3.985006332397461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,2,128,1,float16,fp8,0,4.072520065307617
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,2,128,1,float16,float16,0,4.9581855773925785
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,2,128,1,fp8,fp8,0,3.9391536712646484
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,4,128,1,float16,float16,0,5.063212966918945
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,4,128,1,float16,fp8,0,4.053015899658203
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,4,128,1,fp8,fp8,0,3.805753707885742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,40,128,1,float16,fp8,0,2.1106767654418945
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,40,128,1,float16,float16,0,2.714004707336426
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,8,128,1,float16,fp8,0,3.921121597290039
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,8,128,1,float16,float16,0,5.0964607238769535
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,8,128,1,fp8,fp8,0,3.886396789550781
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,1,128,1,float16,fp8,0,1.9578592300415039
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,40,128,1,fp8,fp8,0,2.5304431915283203
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,1,128,1,float16,float16,0,2.264257621765137
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,1,128,1,fp8,fp8,0,1.9585695266723633
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,2,128,1,float16,float16,0,2.403940773010254
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,2,128,1,fp8,fp8,0,1.9787727355957032
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,2,128,1,float16,fp8,0,2.3205743789672852
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,4,128,1,float16,fp8,0,2.007931137084961
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,4,128,1,float16,float16,0,2.272939109802246
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,4,128,1,fp8,fp8,0,2.37490234375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,8,128,1,float16,float16,0,2.3578975677490233
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,40,128,1,float16,float16,0,1.2376832008361816
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,40,128,1,float16,fp8,0,1.113920021057129
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,8,128,1,fp8,fp8,0,1.9549728393554688
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,40,128,1,fp8,fp8,0,1.300603199005127
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,1,128,1,float16,float16,0,1.1282784461975097
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,1,128,1,float16,fp8,0,1.3046223640441894
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,1,128,1,fp8,fp8,0,1.2377504348754882
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,2,128,1,float16,fp8,0,1.0258079528808595
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,2,128,1,fp8,fp8,0,1.104372787475586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,4,128,1,float16,float16,0,1.1909119606018066
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,4,128,1,float16,fp8,0,1.0760463714599608
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,4,128,1,fp8,fp8,0,1.0818752288818358
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,8,128,1,float16,fp8,0,0.9896224021911622
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,8,128,1,float16,float16,0,1.1369872093200684
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,8,128,1,fp8,fp8,0,1.0033807754516602
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,40,128,1,float16,fp8,0,0.5795087814331055
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,40,128,1,fp8,fp8,0,0.5818352222442627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,1,128,1,float16,float16,0,0.5847551822662354
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,1,128,1,float16,fp8,0,0.5582767963409424
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,1,128,1,fp8,fp8,0,0.5353343963623047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,2,128,1,float16,float16,0,0.5848207950592041
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,2,128,1,float16,fp8,0,0.5408383846282959
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,2,128,1,fp8,fp8,0,0.5553919792175293
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,4,128,1,float16,fp8,0,0.5322080135345459
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,4,128,1,float16,float16,0,0.6217599868774414
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,8,128,1,float16,fp8,0,2.2081151962280274
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,4,128,1,fp8,fp8,0,0.5355631828308105
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,8,128,1,float16,float16,0,0.5992159843444824
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,8,128,1,float16,fp8,0,0.5479423999786377
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,8,128,1,fp8,fp8,0,0.5303055763244628
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,2,128,1,float16,float16,0,1.1385071754455567
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,40,128,1,float16,float16,0,0.6937056064605713
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,1,128,1,float16,fp8,0,4.468457412719727
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,1,128,1,fp8,fp8,0,4.53240966796875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,2,128,1,float16,fp8,0,4.530508804321289
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,1,128,1,float16,float16,0,5.78087043762207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,2,128,1,float16,float16,0,5.672895812988282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,4,128,1,float16,float16,0,5.658204650878906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,4,128,1,float16,fp8,0,4.498721694946289
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,2,128,1,fp8,fp8,0,4.5336353302001955
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,4,128,1,fp8,fp8,0,4.470016098022461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,40,128,1,fp8,fp8,0,2.5172607421875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,8,128,1,float16,fp8,0,4.497769546508789
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,8,128,1,float16,float16,0,5.838087844848633
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,8,128,1,fp8,fp8,0,4.714459228515625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,1,128,1,float16,fp8,0,2.2555456161499023
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,1,128,1,float16,float16,0,2.6103296279907227
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,1,128,1,fp8,fp8,0,2.2450223922729493
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,2,128,1,float16,float16,0,2.7452320098876952
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,40,128,1,float16,float16,0,3.0722911834716795
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,2,128,1,fp8,fp8,0,2.3037200927734376
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,2,128,1,float16,fp8,0,2.4878591537475585
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,40,128,1,float16,fp8,0,2.7121423721313476
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,4,128,1,float16,fp8,0,2.4224672317504883
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,4,128,1,float16,float16,0,2.5606719970703127
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,4,128,1,fp8,fp8,0,2.2473983764648438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,40,128,1,float16,float16,0,1.4436351776123046
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,8,128,1,float16,fp8,0,2.241963195800781
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,8,128,1,fp8,fp8,0,2.2819072723388674
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,1,128,1,float16,fp8,0,1.1562000274658204
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,40,128,1,fp8,fp8,0,1.284928035736084
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,1,128,1,float16,float16,0,1.4622655868530274
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,1,128,1,fp8,fp8,0,1.3231519699096679
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,2,128,1,float16,fp8,0,1.163041591644287
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,2,128,1,float16,float16,0,1.3086159706115723
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,2,128,1,fp8,fp8,0,1.1422127723693847
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,4,128,1,float16,float16,0,1.298744010925293
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,4,128,1,float16,fp8,0,1.4161600112915038
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,4,128,1,fp8,fp8,0,1.2003055572509767
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,40,128,1,float16,float16,0,0.821134376525879
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,8,128,1,fp8,fp8,0,1.1435711860656739
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,8,128,1,float16,float16,0,1.3123552322387695
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,40,128,1,float16,fp8,0,0.705131196975708
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,8,128,1,float16,fp8,0,1.338368034362793
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,40,128,1,fp8,fp8,0,0.6762976169586181
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,40,128,1,float16,fp8,0,1.248470401763916
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,1,128,1,float16,fp8,0,0.6201663970947265
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,2,128,1,float16,float16,0,0.6727039813995361
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,2,128,1,fp8,fp8,0,0.6071280002593994
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,2,128,1,float16,fp8,0,0.6449952125549316
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,4,128,1,float16,float16,0,0.6726208209991456
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,8,128,1,float16,float16,0,2.9653520584106445
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,4,128,1,float16,fp8,0,0.615825605392456
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,4,128,1,fp8,fp8,0,0.6977759838104248
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,8,128,1,float16,fp8,0,0.6214928150177002
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,40,128,1,float16,float16,0,0.3919584035873413
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,8,128,1,fp8,fp8,0,0.5993311882019043
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,1,128,1,float16,float16,0,0.35619359016418456
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,40,128,1,fp8,fp8,0,0.40309758186340333
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,1,128,1,float16,fp8,0,0.3267663955688477
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,1,128,1,fp8,fp8,0,0.3311183929443359
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,2,128,1,float16,float16,0,0.360697603225708
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,2,128,1,float16,fp8,0,0.32730081081390383
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,2,128,1,fp8,fp8,0,0.3310719966888428
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,4,128,1,float16,float16,0,0.36063361167907715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,4,128,1,float16,fp8,0,0.3278223991394043
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,4,128,1,fp8,fp8,0,0.3308896064758301
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,8,128,1,float16,fp8,0,0.32809441089630126
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,8,128,1,fp8,fp8,0,0.3310256004333496
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,1,128,1,float16,float16,0,0.7393760204315185
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,1,128,1,fp8,fp8,0,0.7456592082977295
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,8,128,1,float16,float16,0,0.6721903800964355
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,40,128,1,float16,fp8,0,0.3583951950073242
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,8,128,1,float16,float16,0,0.3651103973388672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,1,128,1,float16,fp8,0,4.348600006103515
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,1,128,1,float16,float16,0,5.268019104003907
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,1,128,1,fp8,fp8,0,4.2855583190917965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,2,128,1,float16,float16,0,5.187995147705078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,2,128,1,float16,fp8,0,4.298027038574219
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,2,128,1,fp8,fp8,0,4.31384162902832
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,4,128,1,float16,float16,0,5.318012619018555
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,40,128,1,float16,fp8,0,2.4521711349487303
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,40,128,1,float16,float16,0,2.9827808380126952
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,4,128,1,float16,fp8,0,4.449140930175782
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,4,128,1,fp8,fp8,0,4.427199935913086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,8,128,1,float16,fp8,0,4.369948959350586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,8,128,1,float16,float16,0,5.371654510498047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,8,128,1,fp8,fp8,0,4.409257507324218
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,1,128,1,float16,float16,0,2.402071952819824
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,1,128,1,fp8,fp8,0,2.218684768676758
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,40,128,1,fp8,fp8,0,2.6294063568115233
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,1,128,1,float16,fp8,0,2.4115968704223634
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,2,128,1,float16,fp8,0,2.1977312088012697
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,2,128,1,float16,float16,0,2.5369007110595705
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,2,128,1,fp8,fp8,0,2.3517120361328123
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,4,128,1,float16,float16,0,2.585081672668457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,4,128,1,fp8,fp8,0,2.210495948791504
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,4,128,1,float16,fp8,0,2.3876352310180664
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,8,128,1,float16,fp8,0,2.1710559844970705
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,40,128,1,float16,float16,0,1.6253023147583008
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,8,128,1,float16,float16,0,2.5385984420776366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,40,128,1,float16,fp8,0,1.285598373413086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,8,128,1,fp8,fp8,0,2.180099105834961
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,40,128,1,fp8,fp8,0,1.4382927894592286
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,1,128,1,float16,fp8,0,1.1239855766296387
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,1,128,1,float16,float16,0,1.2553071975708008
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,1,128,1,fp8,fp8,0,1.1175104141235352
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,2,128,1,fp8,fp8,0,1.0994336128234863
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,2,128,1,float16,float16,0,1.3413616180419923
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,2,128,1,float16,fp8,0,1.3691776275634766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,4,128,1,float16,float16,0,1.2396592140197753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,4,128,1,float16,fp8,0,1.1527664184570312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,4,128,1,fp8,fp8,0,1.1050352096557616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,8,128,1,float16,fp8,0,1.0975040435791015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,40,128,1,float16,float16,0,0.8163200378417969
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,40,128,1,float16,fp8,0,0.7064623832702637
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,8,128,1,float16,float16,0,1.3756511688232422
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,8,128,1,fp8,fp8,0,1.0982064247131347
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,1,128,1,float16,float16,0,0.6200943946838379
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,1,128,1,float16,fp8,0,0.5689216136932373
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,1,128,1,fp8,fp8,0,0.574948787689209
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,2,128,1,float16,fp8,0,0.5821760177612305
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,4,128,1,float16,float16,0,0.6237887859344482
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,2,128,1,fp8,fp8,0,0.7189727783203125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,4,128,1,float16,fp8,0,0.589899206161499
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,4,128,1,fp8,fp8,0,0.5690320014953614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,8,128,1,float16,float16,0,0.6317359924316406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,8,128,1,float16,fp8,0,0.5744239807128906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,40,128,1,float16,float16,0,0.4440447807312012
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,40,128,1,float16,fp8,0,0.35060160160064696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,8,128,1,fp8,fp8,0,0.6091551780700684
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,40,128,1,fp8,fp8,0,0.3425487995147705
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,1,128,1,float16,float16,0,0.3243056058883667
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,1,128,1,float16,fp8,0,0.30652480125427245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,1,128,1,fp8,fp8,0,0.3319664001464844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,2,128,1,float16,fp8,0,0.31946239471435545
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,2,128,1,fp8,fp8,0,0.3058640003204346
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,4,128,1,float16,float16,0,0.3275808095932007
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,4,128,1,float16,fp8,0,0.3067935943603516
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,4,128,1,fp8,fp8,0,0.3287807941436768
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,8,128,1,float16,float16,0,0.33794240951538085
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,8,128,1,float16,fp8,0,0.3061408042907715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,40,128,1,float16,float16,0,0.20634880065917968
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,8,128,1,fp8,fp8,0,0.34291040897369385
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,40,128,1,float16,fp8,0,0.2077104091644287
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,1,128,1,float16,float16,0,0.17981599569320678
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,1,128,1,float16,fp8,0,0.17363519668579103
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,1,128,1,fp8,fp8,0,0.18246560096740722
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,2,128,1,float16,float16,0,0.18573600053787231
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,2,128,1,float16,fp8,0,0.17388639450073243
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,2,128,1,fp8,fp8,0,0.17341920137405395
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,4,128,1,float16,float16,0,0.19003679752349853
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,4,128,1,float16,fp8,0,0.18072960376739503
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,4,128,1,fp8,fp8,0,0.17379039525985718
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,8,128,1,float16,float16,0,0.18562239408493042
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,8,128,1,float16,fp8,0,0.18156479597091674
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,8,128,1,fp8,fp8,0,0.17774399518966674
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,40,128,1,fp8,fp8,0,0.6586463928222657
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,2,128,1,float16,float16,0,0.6718671798706055
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,2,128,1,float16,float16,0,0.32580320835113524
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,1,128,1,float16,fp8,0,2.609280014038086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,1,128,1,fp8,fp8,0,2.626092720031738
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,1,128,1,float16,float16,0,2.988987159729004
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,40,128,1,fp8,fp8,0,0.19809759855270387
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,2,128,1,float16,float16,0,2.9885839462280273
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,2,128,1,float16,fp8,0,2.6086095809936523
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,2,128,1,fp8,fp8,0,2.60067195892334
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,4,128,1,float16,fp8,0,2.6182960510253905
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,4,128,1,float16,float16,0,3.078236770629883
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,4,128,1,fp8,fp8,0,2.8485488891601562
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,8,128,1,float16,float16,0,2.946112060546875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,8,128,1,float16,fp8,0,2.8731407165527343
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,8,128,1,fp8,fp8,0,2.610651206970215
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,40,128,1,float16,float16,0,1.7585599899291993
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,40,128,1,float16,fp8,0,1.544598388671875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,1,128,1,float16,fp8,0,1.319883155822754
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,1,128,1,float16,float16,0,1.4741840362548828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,2,128,1,float16,float16,0,1.4566368103027343
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,1,128,1,fp8,fp8,0,1.597987174987793
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,40,128,1,fp8,fp8,0,1.9557968139648438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,2,128,1,float16,fp8,0,1.3336400032043456
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,2,128,1,fp8,fp8,0,1.4745152473449707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,4,128,1,float16,float16,0,1.477291202545166
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,4,128,1,float16,fp8,0,1.3200719833374024
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,40,128,1,float16,float16,0,0.8887727737426758
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,4,128,1,fp8,fp8,0,1.489361572265625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,8,128,1,float16,fp8,0,1.3886207580566405
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,8,128,1,fp8,fp8,0,1.3628527641296386
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,40,128,1,float16,fp8,0,0.8060671806335449
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,8,128,1,float16,float16,0,1.6524127960205077
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,1,128,1,float16,float16,0,0.7293471813201904
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,40,128,1,fp8,fp8,0,0.9271759986877441
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,1,128,1,fp8,fp8,0,0.7073279857635498
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,1,128,1,float16,fp8,0,0.8010704040527343
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,2,128,1,float16,float16,0,0.7259007930755615
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,2,128,1,fp8,fp8,0,0.6839983940124512
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,4,128,1,float16,float16,0,0.8179280281066894
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,4,128,1,float16,fp8,0,0.6770927906036377
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,40,128,1,float16,float16,0,0.45298399925231936
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,4,128,1,fp8,fp8,0,0.7486127853393555
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,8,128,1,float16,float16,0,0.7559855937957763
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,40,128,1,float16,fp8,0,0.4747583866119385
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,40,128,1,fp8,fp8,0,0.4084832191467285
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,1,128,1,float16,float16,0,0.37281439304351804
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,1,128,1,float16,fp8,0,0.3648240089416504
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,1,128,1,fp8,fp8,0,0.4028336048126221
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,2,128,1,float16,float16,0,0.39236319065093994
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,2,128,1,float16,fp8,0,0.365665602684021
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,2,128,1,fp8,fp8,0,0.35710558891296384
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,4,128,1,float16,float16,0,0.37973120212554934
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,4,128,1,float16,fp8,0,0.3855423927307129
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,4,128,1,fp8,fp8,0,0.38771040439605714
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,8,128,1,float16,float16,0,0.38727359771728515
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,8,128,1,float16,fp8,0,0.37539680004119874
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,40,128,1,float16,float16,0,0.24734079837799072
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,40,128,1,float16,fp8,0,0.2365664005279541
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,40,128,1,fp8,fp8,0,0.22560160160064696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,1,128,1,float16,float16,0,0.21102240085601806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,1,128,1,float16,fp8,0,0.2052288055419922
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,1,128,1,fp8,fp8,0,0.19994239807128905
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,2,128,1,float16,float16,0,0.2058351993560791
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,2,128,1,float16,fp8,0,0.20038399696350098
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,2,128,1,fp8,fp8,0,0.20052800178527833
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,4,128,1,float16,float16,0,0.20872800350189208
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,4,128,1,float16,fp8,0,0.20101120471954345
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,4,128,1,fp8,fp8,0,0.19759680032730104
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,8,128,1,float16,float16,0,0.21263198852539061
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,8,128,1,float16,fp8,0,0.2007040023803711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,40,128,1,float16,float16,0,0.13684480190277098
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,8,128,1,fp8,fp8,0,0.19792319536209108
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,40,128,1,float16,fp8,0,0.1299056053161621
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,40,128,1,fp8,fp8,0,0.12962720394134522
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,1,128,1,float16,float16,0,0.11875519752502442
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,1,128,1,fp8,fp8,0,0.11550719738006592
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,1,128,1,float16,fp8,0,0.11528160572052001
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,2,128,1,float16,float16,0,0.11944160461425782
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,2,128,1,float16,fp8,0,0.11507999897003174
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,2,128,1,fp8,fp8,0,0.11571359634399414
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,4,128,1,float16,float16,0,0.12026079893112182
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,2,128,1,float16,fp8,0,0.6970704078674317
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,4,128,1,fp8,fp8,0,0.11534240245819091
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,8,128,1,float16,float16,0,0.12206239700317383
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,8,128,1,float16,fp8,0,0.11455520391464233
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,8,128,1,fp8,fp8,0,0.11372159719467163
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,8,128,1,float16,fp8,0,0.731931209564209
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,8,128,1,fp8,fp8,0,0.7065120220184327
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,8,128,1,fp8,fp8,0,0.3592207908630371
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,1,128,1,float16,fp8,0,2.6633424758911133
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,1,128,1,fp8,fp8,0,2.661476707458496
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,1,128,1,float16,float16,0,2.9160320281982424
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,2,128,1,float16,float16,0,2.9921775817871095
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,2,128,1,float16,fp8,0,2.6616912841796876
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,2,128,1,fp8,fp8,0,2.6567440032958984
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,4,128,1,float16,fp8,0,0.11508159637451172
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,4,128,1,float16,float16,0,2.913582420349121
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,40,128,1,float16,fp8,0,1.6241487503051757
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,40,128,1,float16,float16,0,1.8420896530151367
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,4,128,1,float16,fp8,0,2.663865661621094
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,4,128,1,fp8,fp8,0,2.670364761352539
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,8,128,1,float16,fp8,0,2.794723129272461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,8,128,1,fp8,fp8,0,2.6699951171875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,8,128,1,float16,float16,0,3.38037109375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,1,128,1,float16,float16,0,1.4606703758239745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,1,128,1,float16,fp8,0,1.344596767425537
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,1,128,1,fp8,fp8,0,1.3577887535095214
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,40,128,1,fp8,fp8,0,1.946446418762207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,2,128,1,float16,float16,0,1.4194448471069336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,2,128,1,float16,fp8,0,1.3605312347412108
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,2,128,1,fp8,fp8,0,1.6332351684570312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,4,128,1,float16,fp8,0,1.3441583633422851
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,4,128,1,fp8,fp8,0,1.351524829864502
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,4,128,1,float16,float16,0,1.5793904304504394
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,8,128,1,float16,float16,0,1.484540843963623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,8,128,1,fp8,fp8,0,1.3432031631469727
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,40,128,1,float16,float16,0,0.9321344375610352
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,40,128,1,float16,fp8,0,0.8270048141479492
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,1,128,1,float16,float16,0,0.8070863723754883
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,40,128,1,fp8,fp8,0,0.8258543968200683
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,1,128,1,float16,fp8,0,0.8220303535461426
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,1,128,1,fp8,fp8,0,0.6848112106323242
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,2,128,1,float16,float16,0,0.7552000045776367
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,2,128,1,float16,fp8,0,0.6848303794860839
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,4,128,1,float16,float16,0,0.7326320171356201
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,2,128,1,fp8,fp8,0,0.7770927906036377
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,4,128,1,float16,fp8,0,0.7485151767730713
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,4,128,1,fp8,fp8,0,0.6848320007324219
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,8,128,1,float16,float16,0,0.8039888381958008
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,40,128,1,float16,fp8,0,0.4391295909881592
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,8,128,1,fp8,fp8,0,0.6848063945770264
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,40,128,1,float16,float16,0,0.4777376174926758
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,40,128,1,fp8,fp8,0,0.4266160011291504
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,1,128,1,float16,float16,0,0.4011119842529297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,1,128,1,float16,fp8,0,0.36321918964385985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,1,128,1,fp8,fp8,0,0.35502560138702394
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,2,128,1,float16,float16,0,0.37250399589538574
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,2,128,1,float16,fp8,0,0.3552383899688721
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,4,128,1,float16,float16,0,0.36914079189300536
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,2,128,1,fp8,fp8,0,0.4014944076538086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,4,128,1,float16,fp8,0,0.368505597114563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,4,128,1,fp8,fp8,0,0.3672415971755981
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,8,128,1,float16,float16,0,0.38121919631958007
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,40,128,1,float16,float16,0,0.24684319496154786
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,8,128,1,float16,fp8,0,0.35946080684661863
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,8,128,1,fp8,fp8,0,0.38121120929718016
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,40,128,1,float16,fp8,0,0.23115200996398927
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,40,128,1,fp8,fp8,0,0.2274768114089966
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,1,128,1,float16,float16,0,0.19606080055236816
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,1,128,1,float16,fp8,0,0.19352799654006958
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,2,128,1,float16,float16,0,0.19550399780273436
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,2,128,1,float16,fp8,0,0.19175519943237304
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,2,128,1,fp8,fp8,0,0.19394559860229493
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,4,128,1,float16,float16,0,0.19773600101470948
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,4,128,1,float16,fp8,0,0.19362080097198486
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,4,128,1,fp8,fp8,0,0.19147200584411622
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,8,128,1,float16,float16,0,0.20410079956054689
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,8,128,1,float16,fp8,0,0.19342880249023436
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,8,128,1,fp8,fp8,0,0.19323519468307496
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,40,128,1,float16,fp8,0,0.12756799459457396
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,40,128,1,fp8,fp8,0,0.1288432002067566
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,1,128,1,float16,float16,0,0.110806405544281
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,1,128,1,float16,fp8,0,0.10887999534606933
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,1,128,1,fp8,fp8,0,0.10825920104980469
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,2,128,1,float16,float16,0,0.11148960590362549
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,2,128,1,float16,fp8,0,0.10864959955215454
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,2,128,1,fp8,fp8,0,0.10867680311203003
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,4,128,1,float16,fp8,0,0.10855040550231934
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,4,128,1,fp8,fp8,0,0.10853919982910157
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,8,128,1,float16,float16,0,0.11639039516448975
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,8,128,1,float16,fp8,0,0.10862560272216797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,8,128,1,fp8,fp8,0,0.10865919589996338
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,40,128,1,float16,fp8,0,0.07519999742507935
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,40,128,1,fp8,fp8,0,0.0751695990562439
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,1,128,1,float16,float16,0,0.0688431978225708
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,1,128,1,float16,fp8,0,0.06630560159683227
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,1,128,1,fp8,fp8,0,0.06596800088882446
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,8,128,1,float16,fp8,0,0.6854207992553711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,2,128,1,float16,float16,0,0.06890559792518616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,2,128,1,float16,fp8,0,0.06623039841651916
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,4,128,1,float16,float16,0,0.06954240202903747
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,4,128,1,float16,fp8,0,0.0661903977394104
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,4,128,1,fp8,fp8,0,0.06623039841651916
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,8,128,1,float16,float16,0,0.07077919840812683
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,8,128,1,float16,fp8,0,0.06619840264320373
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,8,128,1,fp8,fp8,0,0.06681920289993286
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,1,128,1,fp8,fp8,0,0.20446720123291015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,8,128,1,float16,fp8,0,1.4526176452636719
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,1,128,1,float16,float16,0,1.724563217163086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,1,128,1,float16,fp8,0,1.6995824813842773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,40,128,1,float16,float16,0,0.13695839643478394
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,4,128,1,float16,float16,0,0.11306079626083373
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,40,128,1,float16,float16,0,0.08258879780769349
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,1,128,1,fp8,fp8,0,1.6967119216918944
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,2,128,1,float16,float16,0,1.7723472595214844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,2,128,1,fp8,fp8,0,0.06600959897041321
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,2,128,1,fp8,fp8,0,1.6990591049194337
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,2,128,1,float16,fp8,0,1.8347328186035157
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,4,128,1,float16,float16,0,1.7701471328735352
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,4,128,1,float16,fp8,0,1.6993904113769531
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,4,128,1,fp8,fp8,0,1.6985551834106445
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,40,128,1,float16,float16,0,1.2057711601257324
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,8,128,1,fp8,fp8,0,1.700547218322754
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,8,128,1,float16,fp8,0,1.826576042175293
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,40,128,1,fp8,fp8,0,1.0711584091186523
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,1,128,1,float16,float16,0,0.8886943817138672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,1,128,1,float16,fp8,0,0.9387807846069336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,1,128,1,fp8,fp8,0,0.8609295845031738
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,2,128,1,float16,float16,0,0.8837583541870118
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,2,128,1,float16,fp8,0,0.860598373413086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,2,128,1,fp8,fp8,0,0.9161007881164551
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,4,128,1,float16,float16,0,0.905726432800293
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,4,128,1,float16,fp8,0,0.8609536170959473
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,4,128,1,fp8,fp8,0,0.8882479667663574
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,8,128,1,float16,float16,0,0.9286879539489746
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,8,128,1,float16,fp8,0,0.901905632019043
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,40,128,1,float16,float16,0,0.6056960105895997
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,40,128,1,float16,fp8,0,0.5540592193603515
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,1,128,1,float16,float16,0,0.4410655975341797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,8,128,1,fp8,fp8,0,0.9454496383666993
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,40,128,1,fp8,fp8,0,0.6196000099182128
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,1,128,1,float16,fp8,0,0.4457871913909912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,1,128,1,fp8,fp8,0,0.4555327892303467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,2,128,1,float16,float16,0,0.44438557624816893
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,2,128,1,float16,fp8,0,0.4408576011657715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,8,128,1,float16,float16,0,1.8480911254882812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,2,128,1,fp8,fp8,0,0.4617311954498291
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,4,128,1,float16,float16,0,0.49257922172546387
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,4,128,1,float16,fp8,0,0.44557919502258303
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,4,128,1,fp8,fp8,0,0.4512015819549561
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,8,128,1,float16,float16,0,0.468671989440918
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,40,128,1,float16,float16,0,0.3145103931427002
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,8,128,1,float16,fp8,0,0.4416560173034668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,8,128,1,fp8,fp8,0,0.4411168098449707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,40,128,1,float16,fp8,0,0.2997888088226318
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,40,128,1,fp8,fp8,0,0.2886287927627563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,1,128,1,float16,float16,0,0.23394720554351806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,1,128,1,float16,fp8,0,0.23322560787200927
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,1,128,1,fp8,fp8,0,0.23505918979644774
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,2,128,1,float16,float16,0,0.23319199085235595
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,2,128,1,float16,fp8,0,0.2330143928527832
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,2,128,1,fp8,fp8,0,0.23984959125518798
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,4,128,1,float16,float16,0,0.23803040981292725
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,4,128,1,float16,fp8,0,0.23363521099090576
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,4,128,1,fp8,fp8,0,0.23296959400177003
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,8,128,1,float16,float16,0,0.24579999446868897
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,40,128,1,float16,float16,0,0.16958080530166625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,40,128,1,float16,fp8,0,0.1561568021774292
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,40,128,1,fp8,fp8,0,0.15575519800186158
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,1,128,1,float16,float16,0,0.12905919551849365
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,1,128,1,float16,fp8,0,0.12902560234069824
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,1,128,1,fp8,fp8,0,0.1314895987510681
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,2,128,1,float16,float16,0,0.12946720123291017
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,2,128,1,float16,fp8,0,0.12821439504623414
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,2,128,1,fp8,fp8,0,0.1275712013244629
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,4,128,1,float16,float16,0,0.1321760058403015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,4,128,1,float16,fp8,0,0.129203200340271
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,4,128,1,fp8,fp8,0,0.1272752046585083
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,8,128,1,float16,float16,0,0.13487679958343507
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,8,128,1,float16,fp8,0,0.12827359437942504
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,8,128,1,fp8,fp8,0,0.12963520288467406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,40,128,1,float16,fp8,0,0.08863360285758973
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,1,128,1,float16,float16,0,0.07648320198059082
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,1,128,1,float16,fp8,0,0.07539680004119872
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,1,128,1,fp8,fp8,0,0.07409279942512512
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,2,128,1,float16,float16,0,0.07625600099563598
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,2,128,1,float16,fp8,0,0.07520480155944824
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,2,128,1,fp8,fp8,0,0.07405760288238525
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,4,128,1,float16,float16,0,0.07742879986763
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,4,128,1,float16,fp8,0,0.07527520060539246
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,4,128,1,fp8,fp8,0,0.07429119944572449
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,8,128,1,float16,float16,0,0.07948960065841675
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,8,128,1,float16,fp8,0,0.07489439845085144
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,8,128,1,fp8,fp8,0,0.0742896020412445
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,40,128,1,float16,fp8,0,0.0554032027721405
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,40,128,1,fp8,fp8,0,0.055559998750686644
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,1,128,1,float16,float16,0,0.05016800165176392
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,1,128,1,float16,fp8,0,0.04865280091762543
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,1,128,1,fp8,fp8,0,0.0485615998506546
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,2,128,1,float16,float16,0,0.050088000297546384
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,2,128,1,float16,fp8,0,0.048065599799156186
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,2,128,1,fp8,fp8,0,0.049267199635505673
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,4,128,1,float16,float16,0,0.05035840272903443
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,4,128,1,float16,fp8,0,0.04909760057926178
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,4,128,1,fp8,fp8,0,0.04847359955310822
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,8,128,1,float16,float16,0,0.05151519775390625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,8,128,1,float16,fp8,0,0.04868319928646088
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,8,128,1,fp8,fp8,0,0.048979198932647704
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,40,128,1,float16,fp8,0,1.0955455780029297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,8,128,1,float16,fp8,0,0.23367838859558104
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,1,128,1,float16,float16,0,1.8316287994384766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,40,128,1,float16,float16,0,0.09493600130081177
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,40,128,1,fp8,fp8,0,0.0883184015750885
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,1,128,1,float16,fp8,0,1.854792022705078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,1,128,1,fp8,fp8,0,1.8564655303955078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,40,128,1,float16,float16,0,0.05716000199317932
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,2,128,1,float16,float16,0,1.8387535095214844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,2,128,1,float16,fp8,0,1.952743911743164
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,2,128,1,fp8,fp8,0,1.8541200637817383
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,4,128,1,float16,fp8,0,1.8529039382934571
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,4,128,1,fp8,fp8,0,1.8548639297485352
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,8,128,1,float16,float16,0,1.96911678314209
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,8,128,1,fp8,fp8,0,0.23322079181671143
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,40,128,1,float16,float16,0,1.3659695625305175
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,8,128,1,float16,fp8,0,2.0440671920776365
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,40,128,1,float16,fp8,0,1.216478443145752
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,8,128,1,fp8,fp8,0,1.872599983215332
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,1,128,1,float16,float16,0,0.9085519790649415
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,40,128,1,fp8,fp8,0,1.3144960403442383
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,1,128,1,fp8,fp8,0,0.9470159530639648
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,2,128,1,float16,float16,0,0.9121456146240234
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,1,128,1,float16,fp8,0,1.010311985015869
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,2,128,1,float16,fp8,0,0.9358223915100098
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,2,128,1,fp8,fp8,0,0.9347968101501465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,4,128,1,float16,float16,0,0.9522656440734864
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,4,128,1,float16,float16,0,1.8731279373168945
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,4,128,1,float16,fp8,0,0.9444432258605957
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,8,128,1,float16,float16,0,0.9996368408203125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,40,128,1,float16,float16,0,0.7005152225494384
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,8,128,1,fp8,fp8,0,0.9358511924743652
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,40,128,1,float16,fp8,0,0.6169824123382568
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,40,128,1,fp8,fp8,0,0.6172304153442383
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,1,128,1,fp8,fp8,0,0.4776031970977783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,1,128,1,float16,fp8,0,0.5203152179718018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,2,128,1,float16,float16,0,0.46617441177368163
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,2,128,1,float16,fp8,0,0.47658400535583495
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,2,128,1,fp8,fp8,0,0.4770944118499756
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,4,128,1,float16,float16,0,0.49820799827575685
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,4,128,1,float16,fp8,0,0.47794079780578613
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,4,128,1,fp8,fp8,0,0.47688961029052734
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,8,128,1,float16,float16,0,0.49779682159423827
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,8,128,1,float16,fp8,0,0.4775087833404541
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,40,128,1,float16,float16,0,0.34513919353485106
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,40,128,1,float16,fp8,0,0.318342399597168
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,40,128,1,fp8,fp8,0,0.31985599994659425
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,1,128,1,float16,float16,0,0.24230079650878905
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,1,128,1,float16,fp8,0,0.2492095947265625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,1,128,1,fp8,fp8,0,0.24820480346679688
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,2,128,1,float16,float16,0,0.24252638816833497
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,2,128,1,float16,fp8,0,0.2482464075088501
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,2,128,1,fp8,fp8,0,0.24871680736541749
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,4,128,1,float16,float16,0,0.24877760410308838
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,4,128,1,fp8,fp8,0,0.24872961044311523
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,8,128,1,float16,float16,0,0.2578896045684814
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,8,128,1,float16,fp8,0,0.24939360618591308
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,8,128,1,fp8,fp8,0,0.24908800125122071
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,40,128,1,float16,float16,0,0.1817520022392273
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,40,128,1,float16,fp8,0,0.1694383978843689
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,40,128,1,fp8,fp8,0,0.16947360038757325
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,8,128,1,float16,fp8,0,0.9343600273132324
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,1,128,1,float16,fp8,0,0.13447999954223633
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,1,128,1,fp8,fp8,0,0.1346160054206848
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,2,128,1,float16,float16,0,0.13311200141906737
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,2,128,1,float16,fp8,0,0.13366880416870117
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,2,128,1,fp8,fp8,0,0.13394080400466918
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,1,128,1,float16,float16,0,0.4858255863189697
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,4,128,1,float16,float16,0,0.13495199680328368
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,4,128,1,float16,fp8,0,0.13360960483551027
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,4,128,1,fp8,fp8,0,0.13447680473327636
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,8,128,1,float16,float16,0,0.13917280435562135
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,8,128,1,float16,fp8,0,0.13432320356369018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,8,128,1,fp8,fp8,0,0.1342352032661438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,40,128,1,float16,float16,0,0.10078719854354859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,40,128,1,float16,fp8,0,0.09434720277786254
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,1,128,1,float16,float16,0,0.0751263976097107
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,1,128,1,float16,fp8,0,0.0755024015903473
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,1,128,1,fp8,fp8,0,0.07548800110816956
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,2,128,1,float16,float16,0,0.07594720125198365
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,2,128,1,float16,fp8,0,0.07527679800987244
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,2,128,1,fp8,fp8,0,0.07519840002059937
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,4,128,1,float16,float16,0,0.07823200225830078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,4,128,1,fp8,fp8,0,0.07525439858436585
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,8,128,1,float16,float16,0,0.08072959780693054
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,8,128,1,float16,fp8,0,0.07582560181617737
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,8,128,1,fp8,fp8,0,0.07569440007209778
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,40,128,1,float16,float16,0,0.05913119912147522
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,40,128,1,float16,fp8,0,0.053572797775268556
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,8,128,1,fp8,fp8,0,0.4980127811431885
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,40,128,1,fp8,fp8,0,0.053472000360488894
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,1,128,1,float16,float16,0,0.046054399013519286
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,1,128,1,float16,fp8,0,0.045337599515914914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,1,128,1,fp8,fp8,0,0.04530560076236725
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,2,128,1,float16,float16,0,0.04620639979839325
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,2,128,1,float16,fp8,0,0.045296001434326175
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,2,128,1,fp8,fp8,0,0.045307201147079465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,4,128,1,float16,float16,0,0.0469296008348465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,4,128,1,float16,fp8,0,0.045307201147079465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,4,128,1,fp8,fp8,0,0.04559360146522522
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,8,128,1,float16,fp8,0,0.04537599980831146
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,8,128,1,fp8,fp8,0,0.04528000056743622
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,40,128,1,float16,float16,0,0.037118399143218996
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,40,128,1,float16,fp8,0,0.03711360096931458
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,40,128,1,fp8,fp8,0,0.037083199620246886
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,1,128,1,float16,float16,0,0.033139199018478394
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,4,128,1,float16,fp8,0,0.24908161163330078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,1,128,1,float16,fp8,0,0.03296639919281006
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,1,128,1,fp8,fp8,0,0.03302719891071319
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,2,128,1,float16,fp8,0,0.0330704003572464
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,2,128,1,fp8,fp8,0,0.03298879861831665
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,4,128,1,float16,float16,0,0.03387520015239716
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,4,128,1,float16,fp8,0,0.0329584002494812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,4,128,1,fp8,fp8,0,0.9414079666137696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,4,128,1,fp8,fp8,0,0.03303999900817871
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,8,128,1,float16,float16,0,0.034995201230049136
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,8,128,1,float16,fp8,0,0.03297280073165894
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,1,128,1,float16,float16,0,0.13292160034179687
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,8,128,1,fp8,fp8,0,0.03305279910564422
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,40,128,1,fp8,fp8,0,0.09493280053138733
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,4,128,1,float16,fp8,0,0.0754256010055542
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,40,1,128,1,float16,float16,0,1.355947208404541
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,40,1,128,1,float16,fp8,0,1.4493328094482423
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,40,1,128,1,fp8,fp8,0,1.4497039794921875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,8,128,1,float16,float16,0,0.0474128007888794
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,40,2,128,1,float16,fp8,0,1.4450703620910645
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,2,128,1,float16,float16,0,0.03315039873123169
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,40,2,128,1,fp8,fp8,0,1.512166404724121
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,40,4,128,1,float16,float16,0,1.3990575790405273
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,40,4,128,1,float16,fp8,0,1.4452048301696778
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,40,4,128,1,fp8,fp8,0,1.442580795288086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,40,40,128,1,float16,fp8,0,1.0025376319885253
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,40,40,128,1,float16,float16,0,1.1032896041870117
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,40,8,128,1,float16,float16,0,1.4945887565612792
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,40,8,128,1,float16,fp8,0,1.4445535659790039
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,40,8,128,1,fp8,fp8,0,1.4642736434936523
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,40,1,128,1,float16,float16,0,0.6851967811584473
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,40,40,128,1,fp8,fp8,0,0.9999343872070312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,40,1,128,1,float16,fp8,0,0.7312240123748779
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,40,1,128,1,fp8,fp8,0,0.7541920185089112
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,40,2,128,1,float16,float16,0,0.7673232078552246
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,40,2,128,1,float16,fp8,0,0.7302735805511474
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,40,2,128,1,fp8,fp8,0,0.7314911842346191
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,40,4,128,1,float16,float16,0,0.7069295883178711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,40,2,128,1,float16,float16,0,1.3582816123962402
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,40,4,128,1,float16,fp8,0,0.7288271903991699
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,40,4,128,1,fp8,fp8,0,0.7299808025360107
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,40,40,128,1,float16,float16,0,0.552784013748169
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,40,8,128,1,float16,float16,0,0.7501776218414307
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,40,8,128,1,fp8,fp8,0,0.7290304183959961
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,40,8,128,1,float16,fp8,0,0.7797135829925537
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,40,40,128,1,float16,fp8,0,0.5082335948944092
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,40,1,128,1,float16,float16,0,0.351308798789978
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,40,40,128,1,fp8,fp8,0,0.5077184200286865
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,40,1,128,1,float16,fp8,0,0.37338080406188967
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,40,1,128,1,fp8,fp8,0,0.37568159103393556
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,40,2,128,1,float16,fp8,0,0.37256319522857667
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,40,2,128,1,fp8,fp8,0,0.388592004776001
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,40,4,128,1,float16,float16,0,0.36117119789123536
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,40,4,128,1,fp8,fp8,0,0.3724447965621948
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,40,8,128,1,float16,float16,0,0.38134241104125977
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,40,8,128,1,float16,fp8,0,0.376694393157959
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,40,40,128,1,float16,float16,0,0.2867392063140869
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,40,8,128,1,fp8,fp8,0,0.3722800016403198
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,40,40,128,1,float16,fp8,0,0.26262240409851073
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,40,40,128,1,fp8,fp8,0,0.26270880699157717
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,40,1,128,1,float16,float16,0,0.18532960414886473
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,40,1,128,1,float16,fp8,0,0.19516479969024658
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,40,1,128,1,fp8,fp8,0,0.19954400062561034
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,40,2,128,1,float16,fp8,0,0.19371520280838012
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,40,2,128,1,fp8,fp8,0,0.19504640102386475
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,40,4,128,1,float16,float16,0,0.19045439958572388
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,40,4,128,1,float16,fp8,0,0.19504319429397582
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,40,4,128,1,fp8,fp8,0,0.19791200160980224
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,40,8,128,1,float16,float16,0,0.19939520359039306
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,40,8,128,1,float16,fp8,0,0.19505759477615356
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,40,8,128,1,fp8,fp8,0,0.19514880180358887
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,40,40,128,1,float16,float16,0,0.1493216037750244
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,40,40,128,1,float16,fp8,0,0.14119839668273926
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,40,40,128,1,fp8,fp8,0,0.13954399824142455
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,40,1,128,1,float16,float16,0,0.10219520330429077
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,40,1,128,1,fp8,fp8,0,0.10531519651412964
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,40,1,128,1,float16,fp8,0,0.10540959835052491
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,40,2,128,1,float16,float16,0,0.10317599773406982
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,40,2,128,1,float16,fp8,0,0.10511679649353027
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,40,2,128,1,fp8,fp8,0,0.10494400262832641
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,40,4,128,1,float16,float16,0,0.10390080213546753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,40,4,128,1,float16,fp8,0,0.10545920133590699
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,40,4,128,1,fp8,fp8,0,0.10549279451370239
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,40,8,128,1,float16,float16,0,0.10886080265045166
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,40,8,128,1,float16,fp8,0,0.10518079996109009
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,40,40,128,1,float16,float16,0,0.08328639864921569
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,40,40,128,1,float16,fp8,0,0.07848160266876221
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,40,8,128,1,fp8,fp8,0,0.10518239736557007
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,40,40,128,1,fp8,fp8,0,0.07824479937553405
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,40,1,128,1,float16,float16,0,0.058195197582244874
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,40,1,128,1,float16,fp8,0,0.05933759808540344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,40,1,128,1,fp8,fp8,0,0.059595197439193726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,40,2,128,1,fp8,fp8,0,0.05943359732627869
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,40,2,128,1,float16,fp8,0,0.059308797121047974
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,40,4,128,1,float16,float16,0,0.06001120209693909
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,40,4,128,1,float16,fp8,0,0.05969759821891785
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,40,4,128,1,fp8,fp8,0,0.05926560163497925
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,40,8,128,1,float16,float16,0,0.06366080045700073
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,40,8,128,1,float16,fp8,0,0.05966879725456238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,40,8,128,1,fp8,fp8,0,0.05964159965515137
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,40,40,128,1,float16,float16,0,0.04937120079994202
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,40,40,128,1,float16,fp8,0,0.04529919922351837
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,40,1,128,1,float16,fp8,0,0.036801600456237794
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,40,1,128,1,fp8,fp8,0,0.03694559931755066
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,40,2,128,1,float16,float16,0,0.036399999260902406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,40,2,128,1,float16,fp8,0,0.037108799815177916
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,40,2,128,1,fp8,fp8,0,0.03710559904575348
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,40,4,128,1,float16,float16,0,0.037092798948287965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,40,4,128,1,float16,fp8,0,0.03668160140514374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,40,4,128,1,fp8,fp8,0,0.03660959899425507
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,40,8,128,1,float16,float16,0,0.03771359920501709
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,40,8,128,1,float16,fp8,0,0.03709119856357575
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,40,2,128,1,float16,float16,0,0.352454400062561
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,40,8,128,1,fp8,fp8,0,0.03709439933300018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,40,40,128,1,float16,fp8,0,0.031009599566459656
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,40,40,128,1,fp8,fp8,0,0.030956798791885377
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,40,4,128,1,float16,fp8,0,0.3724015951156616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,40,1,128,1,float16,float16,0,0.026919999718666078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,40,1,128,1,float16,fp8,0,0.02685759961605072
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,40,1,128,1,fp8,fp8,0,0.026872000098228453
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,40,2,128,1,float16,float16,0,0.026921600103378296
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,40,2,128,1,float16,fp8,0,0.02689119875431061
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,40,2,128,1,fp8,fp8,0,0.02686559855937958
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,40,4,128,1,float16,float16,0,0.026867198944091796
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,40,2,128,1,float16,float16,0,0.1857807993888855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,40,4,128,1,float16,fp8,0,0.026824000477790832
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,40,4,128,1,fp8,fp8,0,0.026843199133872987
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,40,8,128,1,float16,float16,0,0.028839999437332155
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,40,8,128,1,fp8,fp8,0,0.026846399903297423
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,40,8,128,1,float16,fp8,0,0.026921600103378296
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,40,40,128,1,float16,float16,0,0.020707200467586517
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,40,40,128,1,float16,fp8,0,0.02069759964942932
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,40,40,128,1,fp8,fp8,0,0.020771199464797975
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,40,1,128,1,float16,float16,0,0.018916800618171692
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,40,1,128,1,float16,fp8,0,0.01870400011539459
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,40,1,128,1,fp8,fp8,0,0.01863359957933426
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,40,2,128,1,float16,float16,0,0.018798400461673737
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,40,2,128,1,float16,fp8,0,0.01868640035390854
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,40,2,128,1,fp8,fp8,0,0.018670399487018586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,40,4,128,1,float16,float16,0,0.019180800020694732
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,40,4,128,1,float16,fp8,0,0.01866080015897751
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,40,4,128,1,fp8,fp8,0,0.018617600202560425
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,40,8,128,1,float16,float16,0,0.020520000159740447
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,40,8,128,1,fp8,fp8,0,0.018644799292087556
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,40,2,128,1,float16,float16,0,0.058657598495483396
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,40,1,128,1,float16,float16,0,0.5672800064086914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,40,1,128,1,float16,fp8,0,0.6198239803314209
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,40,40,128,1,fp8,fp8,0,0.04520959854125976
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,40,1,128,1,fp8,fp8,0,0.6206016063690185
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,40,2,128,1,float16,float16,0,0.5682943820953369
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,40,2,128,1,float16,fp8,0,0.6188591957092285
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,40,2,128,1,fp8,fp8,0,0.6201424121856689
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,40,4,128,1,float16,float16,0,0.589086389541626
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,40,40,128,1,float16,float16,0,0.030401599407196046
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,40,4,128,1,float16,fp8,0,0.618507194519043
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,40,8,128,1,float16,float16,0,0.6282639980316163
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,40,8,128,1,float16,fp8,0,0.620192003250122
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,40,40,128,1,float16,float16,0,0.48432159423828125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,40,8,128,1,fp8,fp8,0,0.619048023223877
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,40,40,128,1,float16,fp8,0,0.4513872146606445
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,40,1,128,1,float16,float16,0,0.2899919986724854
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,40,40,128,1,fp8,fp8,0,0.45232481956481935
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,40,1,128,1,float16,fp8,0,0.3161632061004639
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,40,1,128,1,fp8,fp8,0,0.3157968044281006
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,40,8,128,1,float16,fp8,0,0.018644799292087556
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,40,2,128,1,float16,float16,0,0.29090399742126466
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,40,2,128,1,float16,fp8,0,0.315444803237915
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,40,2,128,1,fp8,fp8,0,0.3159712076187134
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,40,4,128,1,float16,float16,0,0.30186240673065184
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,40,4,128,1,float16,fp8,0,0.3155855894088745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,40,4,128,1,fp8,fp8,0,0.3150464057922363
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,40,40,128,1,float16,float16,0,0.2512383937835693
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,40,8,128,1,float16,fp8,0,0.3160048007965088
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,40,8,128,1,fp8,fp8,0,0.31630079746246337
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,40,40,128,1,float16,fp8,0,0.23426079750061035
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,40,1,128,1,float16,float16,0,0.03595679998397827
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,40,1,128,1,float16,float16,0,0.15521759986877443
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,40,2,128,1,float16,float16,0,0.15566240549087523
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,40,2,128,1,float16,fp8,0,0.16608320474624633
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,40,2,128,1,fp8,fp8,0,0.16628639698028563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,40,4,128,1,float16,float16,0,0.16021759510040284
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,40,4,128,1,float16,fp8,0,0.1656767964363098
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,40,8,128,1,float16,float16,0,0.16933920383453369
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,40,8,128,1,float16,fp8,0,0.16634080410003663
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,40,8,128,1,fp8,fp8,0,0.16649279594421387
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,40,4,128,1,fp8,fp8,0,0.1661839962005615
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,40,40,128,1,float16,float16,0,0.13141440153121947
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,40,40,128,1,float16,fp8,0,0.1224784016609192
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,40,40,128,1,fp8,fp8,0,0.12297600507736206
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,40,1,128,1,float16,float16,0,0.08410559892654419
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,40,1,128,1,float16,fp8,0,0.0880832016468048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,40,2,128,1,float16,float16,0,0.08431360125541687
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,40,2,128,1,float16,fp8,0,0.08797119855880738
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,40,2,128,1,fp8,fp8,0,0.08798879981040955
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,40,4,128,1,fp8,fp8,0,0.6185696125030518
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,40,4,128,1,float16,float16,0,0.0858560025691986
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,40,4,128,1,float16,fp8,0,0.0882207989692688
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,40,4,128,1,fp8,fp8,0,0.08818560242652893
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,40,8,128,1,float16,float16,0,0.09091839790344239
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,40,8,128,1,float16,fp8,0,0.08817920088768005
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,40,40,128,1,float16,float16,0,0.07459999918937683
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,40,40,128,1,float16,fp8,0,0.06985759735107422
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,40,40,128,1,fp8,fp8,0,0.06987040042877198
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,40,1,128,1,float16,float16,0,0.0491023987531662
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,40,1,128,1,float16,fp8,0,0.05144960284233093
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,40,1,128,1,fp8,fp8,0,0.051260799169540405
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,40,2,128,1,float16,float16,0,0.04941120147705078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,40,2,128,1,float16,fp8,0,0.051256000995635986
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,40,2,128,1,fp8,fp8,0,0.05144320130348205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,40,4,128,1,float16,float16,0,0.05127679705619812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,40,4,128,1,float16,fp8,0,0.051419198513031006
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,40,4,128,1,fp8,fp8,0,0.05145599842071533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,40,8,128,1,float16,float16,0,0.05408959984779358
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,40,8,128,1,float16,fp8,0,0.051444798707962036
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,40,40,128,1,fp8,fp8,0,0.23435840606689454
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,40,8,128,1,fp8,fp8,0,0.05146880149841308
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,40,1,128,1,fp8,fp8,0,0.16680480241775514
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,40,40,128,1,float16,float16,0,0.04532159864902496
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,40,40,128,1,fp8,fp8,0,0.04117920100688934
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,40,1,128,1,float16,float16,0,0.03296000063419342
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,40,1,128,1,float16,fp8,0,0.032953599095344545
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,40,1,128,1,fp8,fp8,0,0.03297599852085113
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,40,2,128,1,float16,float16,0,0.03285279870033264
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,40,2,128,1,float16,fp8,0,0.03304159939289093
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,40,2,128,1,fp8,fp8,0,0.032995200157165526
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,40,4,128,1,float16,float16,0,0.033000001311302186
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,40,4,128,1,float16,fp8,0,0.032950401306152344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,40,8,128,1,float16,float16,0,0.033283200860023496
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,40,4,128,1,fp8,fp8,0,0.033062401413917544
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,40,8,128,1,float16,fp8,0,0.03304319977760315
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,40,8,128,1,fp8,fp8,0,0.03299039900302887
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,40,40,128,1,float16,float16,0,0.026849600672721862
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,40,40,128,1,float16,fp8,0,0.027166399359703063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,40,40,128,1,fp8,fp8,0,0.02890079915523529
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,40,1,128,1,float16,float16,0,0.022787199914455415
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,40,1,128,1,float16,fp8,0,0.024377599358558655
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,40,1,128,1,fp8,fp8,0,0.022784000635147093
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,40,1,128,1,fp8,fp8,0,0.08850880265235901
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,40,2,128,1,float16,float16,0,0.023472000658512116
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,40,2,128,1,float16,fp8,0,0.022808000445365906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,40,4,128,1,float16,float16,0,0.02293439954519272
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,40,4,128,1,float16,fp8,0,0.022856000065803527
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,40,4,128,1,fp8,fp8,0,0.023057599365711213
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,40,8,128,1,float16,float16,0,0.024820800125598907
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,40,8,128,1,float16,fp8,0,0.023260800540447234
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,40,8,128,1,fp8,fp8,0,0.023715199530124666
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,40,40,128,1,float16,float16,0,0.018668800592422485
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,40,40,128,1,float16,fp8,0,0.018699200451374055
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,40,40,128,1,fp8,fp8,0,0.01865600049495697
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,40,8,128,1,fp8,fp8,0,0.0885200023651123
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,40,1,128,1,float16,float16,0,0.016539199650287627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,40,1,128,1,float16,fp8,0,0.01663520038127899
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,40,2,128,1,float16,float16,0,0.016502399742603303
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,40,2,128,1,float16,fp8,0,0.016527999937534333
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,40,2,128,1,fp8,fp8,0,0.016543999314308167
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,40,4,128,1,float16,float16,0,0.016648000478744505
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,40,4,128,1,float16,fp8,0,0.016318400204181672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,40,4,128,1,fp8,fp8,0,0.01684959977865219
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,40,8,128,1,float16,float16,0,0.016599999368190767
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,40,8,128,1,float16,fp8,0,0.016604800522327424
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,40,8,128,1,fp8,fp8,0,0.01653439998626709
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,40,40,128,1,float16,float16,0,0.016590400040149687
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,40,40,128,1,float16,fp8,0,0.016612799465656282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,40,40,128,1,fp8,fp8,0,0.016622400283813475
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,40,1,128,1,float16,float16,0,0.015078400075435639
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,40,1,128,1,float16,fp8,0,0.014860799908638
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,40,1,128,1,fp8,fp8,0,0.014560000598430633
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,40,2,128,1,float16,float16,0,0.015279999375343323
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,40,2,128,1,float16,fp8,0,0.014595200121402741
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,40,2,128,1,fp8,fp8,0,0.015084800124168397
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,40,4,128,1,float16,float16,0,0.014902399480342865
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,40,4,128,1,float16,fp8,0,0.015132799744606018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,40,4,128,1,fp8,fp8,0,0.015049600601196289
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,40,8,128,1,float16,float16,0,0.0147599995136261
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,40,8,128,1,float16,float16,0,0.320361590385437
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,40,8,128,1,float16,fp8,0,0.014716799557209014
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,40,8,128,1,fp8,fp8,0,0.01454080045223236
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,40,1,128,1,float16,fp8,0,0.1661120057106018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,40,40,128,1,float16,fp8,0,0.041222399473190306
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,40,1,128,1,float16,float16,0,0.34817919731140134
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,40,1,128,1,fp8,fp8,0,0.37282719612121584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,40,2,128,1,float16,float16,0,0.34973280429840087
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,40,2,128,1,float16,fp8,0,0.37147040367126466
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,40,2,128,1,fp8,fp8,0,0.3719104051589966
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,40,4,128,1,float16,float16,0,0.35864160060882566
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,40,4,128,1,float16,fp8,0,0.37159841060638427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,40,4,128,1,fp8,fp8,0,0.37168800830841064
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,40,8,128,1,float16,float16,0,0.37679519653320315
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,40,8,128,1,float16,fp8,0,0.3720096111297607
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,40,2,128,1,fp8,fp8,0,0.022787199914455415
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,40,1,128,1,float16,float16,0,0.18135199546813965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,40,40,128,1,float16,fp8,0,0.25829439163208007
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,40,1,128,1,float16,fp8,0,0.19135199785232543
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,40,1,128,1,fp8,fp8,0,0.191702401638031
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,40,2,128,1,float16,float16,0,0.18156960010528564
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,40,1,128,1,fp8,fp8,0,0.01664319932460785
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,40,2,128,1,fp8,fp8,0,0.19124480485916137
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,40,4,128,1,float16,float16,0,0.18606719970703126
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,40,4,128,1,fp8,fp8,0,0.1917456030845642
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,40,8,128,1,float16,float16,0,0.19466240406036378
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,40,8,128,1,float16,fp8,0,0.19177279472351075
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,40,8,128,1,fp8,fp8,0,0.19178240299224852
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,40,40,128,1,float16,float16,0,0.1426527976989746
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,40,40,128,1,float16,fp8,0,0.135972797870636
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,40,40,128,1,fp8,fp8,0,0.13662559986114503
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,40,1,128,1,float16,fp8,0,0.37264480590820315
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,40,1,128,1,float16,fp8,0,0.10129760503768921
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,40,1,128,1,fp8,fp8,0,0.1016160011291504
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,40,2,128,1,float16,float16,0,0.0977295994758606
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,40,2,128,1,float16,fp8,0,0.10197279453277588
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,40,2,128,1,fp8,fp8,0,0.10164320468902588
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,40,4,128,1,float16,float16,0,0.0999135971069336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,40,40,128,1,float16,float16,0,0.27345120906829834
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,40,40,128,1,fp8,fp8,0,0.2584480047225952
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,40,4,128,1,float16,fp8,0,0.10158239603042603
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,40,4,128,1,fp8,fp8,0,0.10190720558166504
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,40,8,128,1,float16,float16,0,0.10436160564422607
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,40,8,128,1,float16,fp8,0,0.10276800394058228
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,40,40,128,1,float16,float16,0,0.0760640025138855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,40,40,128,1,float16,fp8,0,0.07395039796829224
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,40,2,128,1,float16,fp8,0,0.19148000478744506
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,40,40,128,1,fp8,fp8,0,0.07308160066604615
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,40,1,128,1,float16,float16,0,0.05345600247383118
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,40,1,128,1,float16,fp8,0,0.05474240183830261
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,40,1,128,1,fp8,fp8,0,0.05418879985809326
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,40,2,128,1,float16,float16,0,0.05357120037078857
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,40,2,128,1,float16,fp8,0,0.055327999591827395
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,40,2,128,1,fp8,fp8,0,0.05402399897575379
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,40,4,128,1,float16,float16,0,0.05549439787864685
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,40,4,128,1,float16,fp8,0,0.055460798740386966
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,40,4,128,1,fp8,fp8,0,0.05459520220756531
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,40,8,128,1,float16,float16,0,0.05772479772567749
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,40,8,128,1,float16,fp8,0,0.05553280115127564
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,40,8,128,1,fp8,fp8,0,0.05490559935569763
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,40,40,128,1,float16,float16,0,0.045278400182724
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,40,40,128,1,float16,fp8,0,0.0412304013967514
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,40,40,128,1,fp8,fp8,0,0.041172799468040464
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,40,1,128,1,float16,float16,0,0.03172639906406403
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,40,1,128,1,float16,fp8,0,0.03292959928512573
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,40,1,128,1,fp8,fp8,0,0.033000001311302186
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,40,2,128,1,float16,float16,0,0.03108479976654053
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,40,2,128,1,float16,fp8,0,0.03294079899787903
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,40,2,128,1,fp8,fp8,0,0.032979199290275575
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,40,4,128,1,float16,float16,0,0.032455998659133914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,40,1,128,1,float16,float16,0,0.09759039878845215
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,40,4,128,1,float16,fp8,0,0.03294720053672791
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,40,4,128,1,fp8,fp8,0,0.032979199290275575
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,40,8,128,1,float16,float16,0,0.03299039900302887
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,40,8,128,1,float16,fp8,0,0.03303520083427429
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,40,40,128,1,float16,float16,0,0.024831999838352204
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,40,40,128,1,float16,fp8,0,0.02585279941558838
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,40,40,128,1,fp8,fp8,0,0.02627040147781372
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,40,1,128,1,float16,float16,0,0.020707200467586517
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,40,1,128,1,float16,fp8,0,0.022443200647830962
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,40,2,128,1,float16,float16,0,0.020694400370121
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,40,2,128,1,float16,fp8,0,0.02266400009393692
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,40,2,128,1,fp8,fp8,0,0.022019200026988983
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,40,4,128,1,float16,float16,0,0.022120000422000886
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,40,4,128,1,float16,fp8,0,0.022392000257968902
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,40,4,128,1,fp8,fp8,0,0.02258400022983551
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,40,1,128,1,fp8,fp8,0,0.021881599724292756
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,40,8,128,1,float16,float16,0,0.022777600586414336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,40,8,128,1,float16,fp8,0,0.0220223993062973
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,40,40,128,1,float16,float16,0,0.018559999763965607
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,40,8,128,1,fp8,fp8,0,0.022342400252819063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,40,40,128,1,float16,fp8,0,0.01868640035390854
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,40,1,128,1,float16,float16,0,0.016543999314308167
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,40,1,128,1,float16,fp8,0,0.016604800522327424
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,40,1,128,1,fp8,fp8,0,0.01653439998626709
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,40,2,128,1,float16,float16,0,0.01661120057106018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,40,2,128,1,float16,fp8,0,0.01661120057106018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,40,2,128,1,fp8,fp8,0,0.016612799465656282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,40,4,128,1,float16,float16,0,0.01655520051717758
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,40,4,128,1,float16,fp8,0,0.01658399999141693
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,40,4,128,1,fp8,fp8,0,0.01655520051717758
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,40,8,128,1,float16,float16,0,0.016547200083732606
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,40,8,128,1,float16,fp8,0,0.016564799845218657
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,40,8,128,1,fp8,fp8,0,0.016630400717258454
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,40,40,128,1,float16,float16,0,0.01276959925889969
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,40,40,128,1,float16,fp8,0,0.012647999823093415
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,40,40,128,1,fp8,fp8,0,0.012537600100040435
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,40,1,128,1,float16,float16,0,0.012513600289821625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,40,1,128,1,float16,fp8,0,0.012460800260305405
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,40,1,128,1,fp8,fp8,0,0.012495999783277511
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,40,8,128,1,fp8,fp8,0,0.3711999893188477
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,40,2,128,1,float16,float16,0,0.01223680004477501
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,40,2,128,1,float16,fp8,0,0.012534399330615998
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,40,4,128,1,float16,float16,0,0.012444800138473511
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,40,2,128,1,fp8,fp8,0,0.012425599992275238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,40,4,128,1,float16,fp8,0,0.012443199753761292
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,40,8,128,1,float16,float16,0,0.01244800016283989
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,40,8,128,1,float16,fp8,0,0.012457600235939026
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,40,8,128,1,fp8,fp8,0,0.0125231996178627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,40,40,128,1,float16,float16,0,0.012404800206422806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,40,40,128,1,float16,fp8,0,0.012427199631929398
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,40,8,128,1,fp8,fp8,0,0.10264960527420045
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,40,1,128,1,float16,float16,0,0.01043040007352829
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,40,40,128,1,fp8,fp8,0,0.01255200058221817
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,40,1,128,1,float16,fp8,0,0.011638399958610535
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,40,1,128,1,fp8,fp8,0,0.010553599894046783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,40,2,128,1,float16,fp8,0,0.01098880022764206
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,40,2,128,1,fp8,fp8,0,0.01196959987282753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,40,4,128,1,float16,float16,0,0.010441599786281586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,40,4,128,1,float16,fp8,0,0.012486399710178375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,40,4,128,1,fp8,fp8,0,0.012169600278139115
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,40,8,128,1,float16,float16,0,0.010502400249242783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,40,8,128,1,float16,fp8,0,0.012465599924325943
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,40,8,128,1,fp8,fp8,0,0.01242239996790886
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,40,4,128,1,float16,fp8,0,0.19101439714431762
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,40,2,128,1,float16,float16,0,0.010494399815797806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,40,1,128,1,float16,float16,0,0.2687743902206421
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,40,1,128,1,float16,fp8,0,0.27905120849609377
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,40,1,128,1,fp8,fp8,0,0.2789328098297119
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,40,2,128,1,float16,float16,0,0.2689663887023926
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,40,2,128,1,float16,fp8,0,0.2790479898452759
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,40,4,128,1,float16,float16,0,0.2739599943161011
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,40,4,128,1,float16,fp8,0,0.2791167974472046
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,40,8,128,1,fp8,fp8,0,0.032918399572372435
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,40,4,128,1,fp8,fp8,0,0.2790112018585205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,40,8,128,1,float16,float16,0,0.2820591926574707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,40,8,128,1,float16,fp8,0,0.2791903972625732
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,40,8,128,1,fp8,fp8,0,0.27826719284057616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,40,40,128,1,float16,float16,0,0.18537119626998902
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,40,40,128,1,float16,fp8,0,0.17819999456405639
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,40,1,128,1,float16,float16,0,0.14098880290985108
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,40,1,128,1,float16,fp8,0,0.1447216033935547
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,40,40,128,1,fp8,fp8,0,0.018719999492168425
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,40,2,128,1,float16,float16,0,0.14204800128936768
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,40,2,128,1,float16,fp8,0,0.1446895956993103
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,40,2,128,1,fp8,fp8,0,0.14423680305480957
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,40,4,128,1,float16,fp8,0,0.14451359510421752
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,40,4,128,1,fp8,fp8,0,0.14449599981307984
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,40,8,128,1,float16,float16,0,0.14764800071716308
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,40,8,128,1,float16,fp8,0,0.14438079595565795
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,40,8,128,1,fp8,fp8,0,0.1447824001312256
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,40,40,128,1,float16,float16,0,0.09794719815254212
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,40,40,128,1,fp8,fp8,0,0.09557279944419861
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,40,4,128,1,fp8,fp8,0,0.01242239996790886
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,40,1,128,1,float16,float16,0,0.07617759704589844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,40,1,128,1,float16,fp8,0,0.07673119902610778
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,40,1,128,1,fp8,fp8,0,0.07704640030860901
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,40,2,128,1,float16,float16,0,0.07687519788742066
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,40,2,128,1,float16,fp8,0,0.07660639882087708
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,40,2,128,1,fp8,fp8,0,0.07734720110893249
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,40,4,128,1,float16,float16,0,0.07765120267868042
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,40,4,128,1,float16,fp8,0,0.0775264024734497
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,40,4,128,1,fp8,fp8,0,0.07703520059585571
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,40,8,128,1,float16,float16,0,0.08113759756088257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,40,8,128,1,fp8,fp8,0,0.07743200063705444
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,40,2,128,1,fp8,fp8,0,0.2791440010070801
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,40,40,128,1,float16,float16,0,0.05542880296707153
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,40,40,128,1,float16,fp8,0,0.05145919919013977
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,40,40,128,1,fp8,fp8,0,0.05149440169334411
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,40,1,128,1,float16,float16,0,0.04233280122280121
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,40,1,128,1,float16,fp8,0,0.04320639967918396
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,40,1,128,1,fp8,fp8,0,0.042840000987052915
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,40,2,128,1,float16,float16,0,0.04325920045375824
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,40,2,128,1,float16,fp8,0,0.04271839857101441
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,40,2,128,1,fp8,fp8,0,0.0432096004486084
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,40,40,128,1,fp8,fp8,0,0.17898080348968506
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,40,4,128,1,float16,fp8,0,0.04317440092563629
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,40,4,128,1,fp8,fp8,0,0.04325760006904602
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,40,8,128,1,float16,float16,0,0.04352959990501404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,40,8,128,1,float16,fp8,0,0.04316479861736298
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,40,8,128,1,fp8,fp8,0,0.0432671993970871
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,40,40,128,1,float16,float16,0,0.028918400406837463
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,40,40,128,1,float16,fp8,0,0.03121120035648346
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,40,4,128,1,float16,float16,0,0.14461760520935057
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,40,40,128,1,fp8,fp8,0,0.030931198596954347
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,40,1,128,1,float16,fp8,0,0.026982399821281432
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,40,1,128,1,fp8,fp8,0,0.0267984002828598
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,40,2,128,1,float16,float16,0,0.02688319981098175
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,40,2,128,1,float16,fp8,0,0.026766398549079896
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,40,2,128,1,fp8,fp8,0,0.02682720124721527
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,40,4,128,1,float16,float16,0,0.026849600672721862
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,40,4,128,1,float16,fp8,0,0.02677919864654541
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,40,4,128,1,fp8,fp8,0,0.026707199215888978
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,40,8,128,1,float16,float16,0,0.02688640058040619
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,40,8,128,1,float16,fp8,0,0.026785600185394286
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,40,8,128,1,fp8,fp8,0,0.026774400472640993
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,40,40,128,1,float16,float16,0,0.020396800339221956
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,40,40,128,1,float16,fp8,0,0.02069759964942932
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,40,40,128,1,fp8,fp8,0,0.020657600462436677
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,40,1,128,1,float16,float16,0,0.018643200397491455
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,40,1,128,1,float16,fp8,0,0.018641600012779237
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,40,1,128,1,fp8,fp8,0,0.018587200343608855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,40,2,128,1,float16,float16,0,0.018593600392341612
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,40,40,128,1,float16,fp8,0,0.09509119987487794
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,40,2,128,1,float16,fp8,0,0.018598400056362152
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,40,2,128,1,fp8,fp8,0,0.018595199286937713
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,40,4,128,1,float16,float16,0,0.018662400543689728
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,40,4,128,1,float16,fp8,0,0.018572799861431122
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,40,4,128,1,fp8,fp8,0,0.018571199476718904
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,40,8,128,1,float16,float16,0,0.018622399866580965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,40,8,128,1,float16,fp8,0,0.01866080015897751
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,40,8,128,1,fp8,fp8,0,0.01866080015897751
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,40,40,128,1,float16,fp8,0,0.014558400213718414
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,40,40,128,1,float16,float16,0,0.015387199819087982
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,40,40,128,1,fp8,fp8,0,0.01501920074224472
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,40,1,128,1,float16,fp8,0,0.014473600685596466
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,40,1,128,1,fp8,fp8,0,0.01451520025730133
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,40,2,128,1,float16,float16,0,0.014552000164985656
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,40,2,128,1,float16,fp8,0,0.014560000598430633
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,40,4,128,1,float16,float16,0,0.014552000164985656
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,40,4,128,1,float16,fp8,0,0.014542399346828461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,40,4,128,1,fp8,fp8,0,0.014519999921321868
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,40,8,128,1,float16,float16,0,0.014560000598430633
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,40,8,128,1,float16,fp8,0,0.07689120173454285
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,40,8,128,1,float16,fp8,0,0.014553600549697876
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,40,8,128,1,fp8,fp8,0,0.014494399726390838
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,40,40,128,1,float16,float16,0,0.012411200255155564
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,40,40,128,1,float16,fp8,0,0.010486400127410889
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,40,40,128,1,fp8,fp8,0,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,40,1,128,1,float16,float16,0,0.010496000200510025
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,40,1,128,1,float16,fp8,0,0.01043040007352829
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,40,1,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,40,2,128,1,float16,float16,0,0.01040479987859726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,40,2,128,1,float16,fp8,0,0.010476800054311753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,40,2,128,1,fp8,fp8,0,0.01053759977221489
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,40,4,128,1,float16,fp8,0,0.01043040007352829
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,40,4,128,1,fp8,fp8,0,0.01048320010304451
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,40,8,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,40,8,128,1,float16,fp8,0,0.01043040007352829
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,40,8,128,1,fp8,fp8,0,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,40,40,128,1,float16,float16,0,0.010664000362157821
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,40,1,128,1,fp8,fp8,0,0.14414080381393432
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,40,40,128,1,float16,fp8,0,0.01043360009789467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,40,40,128,1,fp8,fp8,0,0.010449600219726563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,40,1,128,1,float16,float16,0,0.010436800122261048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,40,1,128,1,float16,fp8,0,0.010471999645233154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,40,2,128,1,float16,float16,0,0.010480000078678131
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,40,1,128,1,fp8,fp8,0,0.010440000146627427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,40,1,128,1,float16,float16,0,0.02672159969806671
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,40,2,128,1,float16,fp8,0,0.010782399773597717
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,40,2,128,1,fp8,fp8,0,0.010441599786281586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,40,4,128,1,float16,fp8,0,0.010492800176143647
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,40,4,128,1,float16,float16,0,0.010412800312042236
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,40,4,128,1,fp8,fp8,0,0.010412800312042236
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,40,8,128,1,float16,float16,0,0.010496000200510025
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,40,8,128,1,float16,fp8,0,0.01048320010304451
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,40,8,128,1,fp8,fp8,0,0.010443200170993806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,40,1,128,1,float16,float16,0,0.2356879949569702
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,40,1,128,1,float16,fp8,0,0.23475360870361328
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,40,1,128,1,fp8,fp8,0,0.2339184045791626
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,40,2,128,1,float16,float16,0,0.2338576078414917
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,40,2,128,1,float16,fp8,0,0.23452799320220946
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,40,2,128,1,fp8,fp8,0,0.2350640058517456
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,40,1,128,1,float16,float16,0,0.01459839940071106
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,40,4,128,1,float16,float16,0,0.23582398891448975
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,40,2,128,1,fp8,fp8,0,0.014574399590492249
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,40,4,128,1,float16,fp8,0,0.2349616050720215
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,40,4,128,1,fp8,fp8,0,0.2349776029586792
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,40,8,128,1,float16,float16,0,0.23814399242401124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,40,40,128,1,float16,float16,0,0.14313280582427979
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,40,40,128,1,float16,fp8,0,0.13990559577941894
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,40,40,128,1,fp8,fp8,0,0.13963680267333983
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,40,1,128,1,float16,float16,0,0.12312480211257934
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,40,1,128,1,float16,fp8,0,0.12146719694137573
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,40,1,128,1,fp8,fp8,0,0.1216480016708374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,40,2,128,1,float16,float16,0,0.12331520318984986
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,40,2,128,1,float16,fp8,0,0.12167199850082397
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,40,2,128,1,fp8,fp8,0,0.12135679721832275
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,40,4,128,1,float16,float16,0,0.12486879825592041
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,40,4,128,1,fp8,fp8,0,0.1219648003578186
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,40,4,128,1,float16,float16,0,0.010494399815797806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,40,8,128,1,float16,float16,0,0.1269984006881714
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,40,8,128,1,float16,fp8,0,0.12183840274810791
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,40,40,128,1,float16,float16,0,0.07884479761123657
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,40,8,128,1,fp8,fp8,0,0.12143360376358033
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,40,40,128,1,float16,fp8,0,0.07388160228729249
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,40,4,128,1,float16,float16,0,0.043227198719978335
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,40,40,128,1,fp8,fp8,0,0.07446720004081726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,40,1,128,1,float16,float16,0,0.06672480106353759
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,40,1,128,1,float16,fp8,0,0.06537920236587524
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,40,1,128,1,fp8,fp8,0,0.06565920114517212
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,40,2,128,1,float16,float16,0,0.06628159880638122
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,40,2,128,1,float16,fp8,0,0.06567999720573425
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,40,2,128,1,fp8,fp8,0,0.06559039950370789
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,40,4,128,1,float16,float16,0,0.06691840291023254
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,40,4,128,1,float16,fp8,0,0.06563839912414551
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,40,8,128,1,float16,float16,0,0.06826720237731934
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,40,8,128,1,float16,fp8,0,0.06576799750328063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,40,40,128,1,float16,float16,0,0.041228801012039185
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,40,8,128,1,fp8,fp8,0,0.06569920182228088
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,40,1,128,1,float16,float16,0,0.03873279988765717
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,40,1,128,1,float16,fp8,0,0.03710399866104126
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,40,1,128,1,fp8,fp8,0,0.03714880049228668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,40,2,128,1,float16,float16,0,0.03804480135440826
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,40,2,128,1,float16,fp8,0,0.037134400010108946
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,40,2,128,1,fp8,fp8,0,0.03711999952793121
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,40,4,128,1,float16,float16,0,0.03887679874897003
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,40,4,128,1,float16,fp8,0,0.03711999952793121
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,40,4,128,1,fp8,fp8,0,0.0371071994304657
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,40,8,128,1,float16,float16,0,0.038910400867462155
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,40,8,128,1,float16,fp8,0,0.0370608001947403
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,40,8,128,1,fp8,fp8,0,0.037003201246261594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,40,40,128,1,float16,float16,0,0.02487040013074875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,40,40,128,1,float16,fp8,0,0.02486239969730377
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,40,40,128,1,fp8,fp8,0,0.024846400320529937
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,40,1,128,1,float16,float16,0,0.023545600473880768
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,40,1,128,1,float16,fp8,0,0.022776000201702118
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,40,1,128,1,fp8,fp8,0,0.0227743998169899
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,40,2,128,1,float16,float16,0,0.023319999873638152
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,40,2,128,1,fp8,fp8,0,0.022753599286079406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,40,2,128,1,float16,fp8,0,0.022787199914455415
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,40,4,128,1,float16,float16,0,0.0233487993478775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,40,4,128,1,float16,fp8,0,0.02276480048894882
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,40,4,128,1,fp8,fp8,0,0.022991999983787537
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,40,8,128,1,float16,float16,0,0.02481279969215393
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,40,8,128,1,float16,fp8,0,0.23463358879089355
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,40,8,128,1,float16,fp8,0,0.023270399868488313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,40,8,128,1,fp8,fp8,0,0.022963200509548188
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,40,40,128,1,float16,float16,0,0.018644799292087556
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,40,40,128,1,float16,fp8,0,0.0175135999917984
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,40,40,128,1,fp8,fp8,0,0.017950400710105896
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,40,1,128,1,float16,float16,0,0.016624000668525696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,40,1,128,1,float16,fp8,0,0.016633599996566772
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,40,1,128,1,fp8,fp8,0,0.016595199704170227
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,40,2,128,1,float16,float16,0,0.016595199704170227
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,40,2,128,1,float16,fp8,0,0.016582399606704712
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,40,2,128,1,fp8,fp8,0,0.016590400040149687
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,40,4,128,1,float16,float16,0,0.016606399416923524
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,40,4,128,1,float16,fp8,0,0.01653759926557541
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,40,4,128,1,float16,fp8,0,0.1215391993522644
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,40,4,128,1,fp8,fp8,0,0.016575999557971954
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,40,8,128,1,float16,float16,0,0.016628800332546233
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,40,8,128,1,fp8,fp8,0,0.016624000668525696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,40,40,128,1,float16,float16,0,0.014451199769973755
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,40,40,128,1,float16,fp8,0,0.01286720037460327
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,40,40,128,1,fp8,fp8,0,0.013036799430847169
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,40,1,128,1,float16,float16,0,0.012508800625801087
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,40,1,128,1,float16,fp8,0,0.012520000338554382
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,40,1,128,1,fp8,fp8,0,0.012542399764060973
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,40,2,128,1,float16,float16,0,0.012516799569129943
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,40,2,128,1,float16,fp8,0,0.012588800489902496
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,40,2,128,1,fp8,fp8,0,0.012600000202655792
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,40,4,128,1,float16,float16,0,0.012520000338554382
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,40,4,128,1,float16,fp8,0,0.012537600100040435
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,40,4,128,1,fp8,fp8,0,0.06579040288925171
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,40,4,128,1,fp8,fp8,0,0.012788799405097962
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,40,8,128,1,fp8,fp8,0,0.01250080019235611
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,40,40,128,1,float16,fp8,0,0.04122079908847809
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,40,40,128,1,fp8,fp8,0,0.0412416011095047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,40,8,128,1,float16,fp8,0,0.012483199685811996
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,40,40,128,1,float16,float16,0,0.010449600219726563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,40,40,128,1,float16,fp8,0,0.010412800312042236
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,40,1,128,1,float16,float16,0,0.010406400263309478
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,40,1,128,1,float16,fp8,0,0.010443200170993806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,40,1,128,1,fp8,fp8,0,0.010486400127410889
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,40,2,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,40,2,128,1,float16,fp8,0,0.010489600151777268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,40,2,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,40,4,128,1,float16,float16,0,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,40,4,128,1,float16,fp8,0,0.010451199859380722
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,40,4,128,1,fp8,fp8,0,0.010486400127410889
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,40,8,128,1,float16,float16,0,0.010744000226259232
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,40,8,128,1,float16,fp8,0,0.010489600151777268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,40,8,128,1,fp8,fp8,0,0.010470400005578995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,40,40,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,40,40,128,1,fp8,fp8,0,0.010388799756765366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,40,1,128,1,float16,float16,0,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,40,1,128,1,float16,fp8,0,0.01035040020942688
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,40,1,128,1,fp8,fp8,0,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,40,2,128,1,float16,float16,0,0.010487999767065048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,40,2,128,1,float16,fp8,0,0.010463999956846238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,40,2,128,1,fp8,fp8,0,0.010411199927330018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,40,4,128,1,float16,float16,0,0.01043040007352829
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,40,4,128,1,float16,fp8,0,0.010393600165843963
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,40,4,128,1,fp8,fp8,0,0.01045759990811348
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,40,8,128,1,float16,fp8,0,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,40,8,128,1,fp8,fp8,0,0.010463999956846238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,40,8,128,1,float16,float16,0,0.010353600233793258
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,40,1,128,1,fp8,fp8,0,0.21348960399627687
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,40,1,128,1,float16,fp8,0,0.21384479999542236
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,40,8,128,1,fp8,fp8,0,0.2338495969772339
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,40,2,128,1,float16,float16,0,0.22316160202026367
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,40,2,128,1,float16,fp8,0,0.214355206489563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,40,2,128,1,fp8,fp8,0,0.21370561122894288
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,40,4,128,1,float16,float16,0,0.2252592086791992
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,40,4,128,1,float16,fp8,0,0.21414079666137695
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,40,4,128,1,fp8,fp8,0,0.21525120735168457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,40,8,128,1,float16,float16,0,0.2281791925430298
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,40,8,128,1,float16,fp8,0,0.21484000682830812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,40,8,128,1,float16,fp8,0,0.016595199704170227
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,40,40,128,1,float16,float16,0,0.13116480112075807
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,40,40,128,1,float16,fp8,0,0.12072319984436035
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,40,40,128,1,fp8,fp8,0,0.12039999961853028
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,40,1,128,1,float16,fp8,0,0.11176639795303345
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,40,1,128,1,fp8,fp8,0,0.11144800186157226
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,40,2,128,1,float16,float16,0,0.11719839572906494
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,40,8,128,1,float16,float16,0,0.012544000148773193
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,40,2,128,1,float16,fp8,0,0.11161919832229614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,40,2,128,1,fp8,fp8,0,0.11218559741973877
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,40,4,128,1,float16,float16,0,0.11917120218276978
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,40,4,128,1,float16,fp8,0,0.11175680160522461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,40,4,128,1,fp8,fp8,0,0.11230080127716065
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,40,40,128,1,fp8,fp8,0,0.010452800244092942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,40,8,128,1,float16,float16,0,0.11960159540176392
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,40,8,128,1,float16,fp8,0,0.11212480068206787
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,40,8,128,1,fp8,fp8,0,0.11252800226211548
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,40,40,128,1,float16,fp8,0,0.06516479849815368
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,40,40,128,1,fp8,fp8,0,0.06539199948310852
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,40,1,128,1,float16,float16,0,0.06524479985237122
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,40,40,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,40,1,128,1,float16,fp8,0,0.060894399881362915
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,40,1,128,1,fp8,fp8,0,0.06113119721412659
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,40,2,128,1,float16,fp8,0,0.06163679957389832
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,40,2,128,1,fp8,fp8,0,0.0613647997379303
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,40,4,128,1,float16,fp8,0,0.061791998147964475
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,40,4,128,1,fp8,fp8,0,0.061750400066375735
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,40,8,128,1,float16,float16,0,0.06603999733924866
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,40,1,128,1,float16,float16,0,0.22121601104736327
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,40,8,128,1,float16,fp8,0,0.061857599020004275
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,40,8,128,1,fp8,fp8,0,0.06133919954299927
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,40,40,128,1,float16,float16,0,0.039124798774719236
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,40,40,128,1,float16,fp8,0,0.03701280057430267
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,40,40,128,1,fp8,fp8,0,0.037057599425315856
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,40,1,128,1,float16,float16,0,0.03743839859962463
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,40,1,128,1,float16,fp8,0,0.03503040075302124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,40,1,128,1,fp8,fp8,0,0.03506079912185669
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,40,2,128,1,float16,float16,0,0.037371200323104856
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,40,2,128,1,float16,fp8,0,0.034955200552940366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,40,2,128,1,fp8,fp8,0,0.03503040075302124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,40,4,128,1,float16,float16,0,0.037092798948287965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,40,4,128,1,float16,fp8,0,0.03502880036830902
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,40,4,128,1,fp8,fp8,0,0.03497599959373474
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,40,8,128,1,float16,float16,0,0.03752320110797882
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,40,8,128,1,fp8,fp8,0,0.21440958976745605
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,40,8,128,1,float16,fp8,0,0.0349839985370636
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,40,8,128,1,fp8,fp8,0,0.03503200113773346
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,40,40,128,1,float16,float16,0,0.02476000040769577
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,40,40,128,1,fp8,fp8,0,0.0227743998169899
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,40,1,128,1,float16,float16,0,0.02276639938354492
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,40,1,128,1,float16,fp8,0,0.022750400006771088
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,40,1,128,1,fp8,fp8,0,0.022686399519443512
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,40,2,128,1,float16,float16,0,0.02306559979915619
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,40,2,128,1,float16,fp8,0,0.022681599855422972
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,40,2,128,1,fp8,fp8,0,0.022724799811840057
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,40,4,128,1,float16,float16,0,0.02319840043783188
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,40,4,128,1,float16,fp8,0,0.022726400196552275
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,40,4,128,1,fp8,fp8,0,0.02266560047864914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,40,8,128,1,float16,float16,0,0.022835199534893037
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,40,8,128,1,float16,fp8,0,0.022720000147819518
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,40,40,128,1,float16,float16,0,0.016927999258041383
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,40,8,128,1,fp8,fp8,0,0.022731199860572815
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,40,40,128,1,float16,fp8,0,0.016539199650287627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,40,40,128,1,float16,float16,0,0.06861600279808044
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,40,40,128,1,fp8,fp8,0,0.016579200327396394
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,40,1,128,1,float16,fp8,0,0.01661120057106018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,40,1,128,1,fp8,fp8,0,0.016310399770736693
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,40,1,128,1,float16,float16,0,0.016540800034999848
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,40,2,128,1,float16,float16,0,0.016518400609493257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,40,2,128,1,fp8,fp8,0,0.016545599699020384
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,40,4,128,1,float16,float16,0,0.0166143998503685
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,40,2,128,1,float16,float16,0,0.06530240178108215
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,40,4,128,1,float16,fp8,0,0.0165583997964859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,40,4,128,1,fp8,fp8,0,0.01664319932460785
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,40,8,128,1,float16,fp8,0,0.016516800224781036
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,40,8,128,1,float16,float16,0,0.016603200137615202
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,40,4,128,1,float16,float16,0,0.06596320271492004
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,40,8,128,1,fp8,fp8,0,0.016510400176048278
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,40,40,128,1,float16,float16,0,0.014302399754524232
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,40,40,128,1,float16,fp8,0,0.012464000284671784
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,40,40,128,1,fp8,fp8,0,0.012529599666595458
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,40,1,128,1,float16,float16,0,0.012529599666595458
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,40,1,128,1,float16,fp8,0,0.012439999729394913
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,40,2,128,1,float16,float16,0,0.012507200241088867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,40,2,128,1,float16,fp8,0,0.012532800436019897
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,40,2,128,1,fp8,fp8,0,0.012534399330615998
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,40,4,128,1,float16,float16,0,0.012535999715328216
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,40,4,128,1,float16,fp8,0,0.012561599910259246
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,40,4,128,1,fp8,fp8,0,0.012449599802494049
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,40,1,128,1,fp8,fp8,0,0.012435200065374375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,40,8,128,1,float16,float16,0,0.012468799948692322
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,40,8,128,1,float16,fp8,0,0.012467200309038163
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,40,8,128,1,fp8,fp8,0,0.012444800138473511
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,40,40,128,1,float16,float16,0,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,40,40,128,1,float16,fp8,0,0.010454399883747101
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,40,40,128,1,fp8,fp8,0,0.010492800176143647
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,40,1,128,1,float16,float16,0,0.010387200117111205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,40,1,128,1,float16,fp8,0,0.010398399829864503
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,40,2,128,1,float16,float16,0,0.010480000078678131
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,40,2,128,1,float16,fp8,0,0.01040479987859726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,40,2,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,40,4,128,1,float16,float16,0,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,40,4,128,1,float16,fp8,0,0.010406400263309478
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,40,4,128,1,fp8,fp8,0,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,40,8,128,1,float16,float16,0,0.010516799986362457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,40,8,128,1,float16,fp8,0,0.010398399829864503
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,40,8,128,1,fp8,fp8,0,0.010412800312042236
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,40,40,128,1,float16,fp8,0,0.00928800031542778
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,40,40,128,1,fp8,fp8,0,0.010387200117111205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,40,1,128,1,float16,float16,0,0.009484799951314926
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,40,1,128,1,float16,fp8,0,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,40,1,128,1,fp8,fp8,0,0.008558399975299835
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,40,2,128,1,float16,float16,0,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,40,2,128,1,float16,fp8,0,0.008870399743318557
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,40,4,128,1,float16,float16,0,0.008857599645853042
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,40,4,128,1,float16,fp8,0,0.008982399851083756
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,40,4,128,1,fp8,fp8,0,0.01037919968366623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,40,8,128,1,float16,fp8,0,0.010398399829864503
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,40,40,128,1,float16,fp8,0,0.02272160053253174
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,40,8,128,1,fp8,fp8,0,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,40,1,128,1,float16,float16,0,0.11859040260314942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,40,1,128,1,float16,fp8,0,0.20603361129760742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,40,1,128,1,float16,float16,0,0.21891839504241944
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,40,2,128,1,float16,float16,0,0.2194960117340088
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,40,2,128,1,float16,fp8,0,0.20636959075927735
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,40,2,128,1,float16,fp8,0,0.016395199298858642
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,40,2,128,1,fp8,fp8,0,0.2063568115234375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,40,4,128,1,float16,float16,0,0.2183135986328125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,40,4,128,1,float16,fp8,0,0.20656158924102783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,40,4,128,1,fp8,fp8,0,0.20671360492706298
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,40,8,128,1,float16,float16,0,0.21867039203643798
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,40,8,128,1,float16,fp8,0,0.2066960096359253
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,40,8,128,1,fp8,fp8,0,0.20658400058746337
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,40,40,128,1,float16,float16,0,0.1175920009613037
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,40,40,128,1,float16,fp8,0,0.10917119979858399
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,40,40,128,1,float16,float16,0,0.010494399815797806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,40,40,128,1,fp8,fp8,0,0.10972959995269775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,40,1,128,1,float16,float16,0,0.11619199514389038
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,40,1,128,1,float16,fp8,0,0.10803519487380982
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,40,1,128,1,fp8,fp8,0,0.10902080535888672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,40,2,128,1,float16,float16,0,0.11632959842681885
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,40,2,128,1,float16,fp8,0,0.10886559486389161
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,40,2,128,1,fp8,fp8,0,0.010355199873447418
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,40,4,128,1,float16,float16,0,0.11621760129928589
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,40,4,128,1,float16,fp8,0,0.10868799686431885
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,40,4,128,1,fp8,fp8,0,0.10853919982910157
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,40,8,128,1,float16,float16,0,0.11596319675445557
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,40,8,128,1,float16,fp8,0,0.10856800079345703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,40,8,128,1,float16,float16,0,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,40,8,128,1,fp8,fp8,0,0.10824480056762695
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,40,40,128,1,float16,float16,0,0.06513280272483826
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,40,40,128,1,float16,fp8,0,0.0597536027431488
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,40,40,128,1,fp8,fp8,0,0.05965120196342468
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,40,1,128,1,float16,fp8,0,0.05958719849586487
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,40,1,128,1,fp8,fp8,0,0.059628802537918094
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,40,2,128,1,float16,float16,0,0.06439359784126282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,40,2,128,1,float16,fp8,0,0.05970240235328674
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,40,2,128,1,fp8,fp8,0,0.059952002763748166
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,40,1,128,1,fp8,fp8,0,0.20596001148223878
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,40,4,128,1,float16,float16,0,0.06440320014953613
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,40,4,128,1,fp8,fp8,0,0.06000319719314575
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,40,8,128,1,float16,float16,0,0.06464160084724427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,40,4,128,1,float16,fp8,0,0.05986080169677734
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,40,8,128,1,float16,fp8,0,0.05975840091705322
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,40,8,128,1,fp8,fp8,0,0.05971519947052002
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,40,40,128,1,float16,fp8,0,0.034027200937271115
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,40,40,128,1,fp8,fp8,0,0.03482080101966858
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,40,1,128,1,float16,float16,0,0.037064000964164734
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,40,1,128,1,float16,fp8,0,0.033769598603248595
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,40,1,128,1,fp8,fp8,0,0.034267199039459226
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,40,2,128,1,float16,float16,0,0.037099200487136844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,40,2,128,1,float16,fp8,0,0.034031999111175534
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,40,2,128,1,fp8,fp8,0,0.03430080115795135
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,40,4,128,1,float16,float16,0,0.03705919981002807
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,40,4,128,1,float16,fp8,0,0.03394399881362915
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,40,8,128,1,float16,float16,0,0.037092798948287965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,40,4,128,1,fp8,fp8,0,0.034246399998664856
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,40,8,128,1,float16,fp8,0,0.033964800834655764
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,40,8,128,1,fp8,fp8,0,0.034140801429748534
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,40,40,128,1,float16,float16,0,0.024166400730609893
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,40,40,128,1,float16,fp8,0,0.022412799298763275
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,40,40,128,1,fp8,fp8,0,0.0210207998752594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,40,1,128,1,float16,float16,0,0.022780799865722658
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,40,1,128,1,float16,fp8,0,0.021452799439430237
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,40,1,128,1,fp8,fp8,0,0.020744000375270844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,40,2,128,1,float16,fp8,0,0.020708799362182617
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,40,2,128,1,float16,float16,0,0.022804799675941467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,40,4,128,1,float16,float16,0,0.02268480062484741
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,40,4,128,1,float16,fp8,0,0.020703999698162077
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,40,4,128,1,fp8,fp8,0,0.020793600380420683
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,40,8,128,1,float16,float16,0,0.022782400250434875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,40,8,128,1,float16,fp8,0,0.020729599893093108
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,40,8,128,1,fp8,fp8,0,0.020852799713611602
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,40,40,128,1,float16,float16,0,0.01664319932460785
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,40,40,128,1,float16,fp8,0,0.014791999757289887
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,40,40,128,1,fp8,fp8,0,0.016260799765586854
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,40,1,128,1,fp8,fp8,0,0.010463999956846238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,40,1,128,1,float16,float16,0,0.015919999778270723
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,40,1,128,1,float16,fp8,0,0.015329599380493164
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,40,2,128,1,float16,float16,0,0.01632159948348999
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,40,2,128,1,float16,fp8,0,0.015803200006484986
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,40,2,128,1,fp8,fp8,0,0.014571200311183929
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,40,4,128,1,float16,fp8,0,0.015512000024318694
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,40,4,128,1,float16,float16,0,0.016518400609493257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,40,4,128,1,fp8,fp8,0,0.015388800203800202
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,40,8,128,1,float16,float16,0,0.016545599699020384
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,40,8,128,1,float16,fp8,0,0.015387199819087982
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,40,8,128,1,fp8,fp8,0,0.014587199687957764
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,40,40,128,1,float16,float16,0,0.013068799674510957
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,40,40,128,1,fp8,fp8,0,0.012479999661445617
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,40,2,128,1,fp8,fp8,0,0.10882079601287842
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,40,1,128,1,float16,float16,0,0.012444800138473511
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,40,1,128,1,float16,fp8,0,0.012460800260305405
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,40,1,128,1,fp8,fp8,0,0.012483199685811996
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,40,2,128,1,float16,float16,0,0.012398400157690049
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,40,2,128,1,float16,fp8,0,0.012476799637079239
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,40,2,128,1,fp8,fp8,0,0.012483199685811996
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,40,4,128,1,float16,float16,0,0.012537600100040435
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,40,4,128,1,float16,fp8,0,0.012468799948692322
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,40,4,128,1,fp8,fp8,0,0.01250080019235611
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,40,8,128,1,float16,float16,0,0.012454400211572647
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,40,8,128,1,float16,fp8,0,0.012417600303888322
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,40,8,128,1,fp8,fp8,0,0.01252640038728714
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,40,40,128,1,float16,float16,0,0.010480000078678131
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,40,40,128,1,float16,fp8,0,0.010414399951696397
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,40,40,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,40,1,128,1,float16,float16,0,0.06426720023155212
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,40,1,128,1,float16,fp8,0,0.010411199927330018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,40,1,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,40,1,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,40,2,128,1,float16,float16,0,0.010377600044012069
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,40,2,128,1,float16,fp8,0,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,40,2,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,40,4,128,1,float16,float16,0,0.010393600165843963
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,40,4,128,1,float16,fp8,0,0.010369600355625152
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,40,4,128,1,fp8,fp8,0,0.010412800312042236
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,40,8,128,1,float16,float16,0,0.010390400141477584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,40,8,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,40,40,128,1,float16,float16,0,0.038422399759292604
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,40,40,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,40,40,128,1,float16,float16,0,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,40,40,128,1,fp8,fp8,0,0.009811200201511383
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,40,1,128,1,float16,float16,0,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,40,1,128,1,float16,fp8,0,0.008436799794435502
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,40,1,128,1,fp8,fp8,0,0.008508799970149994
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,40,2,128,1,float16,float16,0,0.009411200135946273
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,40,2,128,1,float16,fp8,0,0.008374399691820144
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,40,2,128,1,fp8,fp8,0,0.009080000221729279
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,40,4,128,1,float16,float16,0,0.009588800370693207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,40,4,128,1,float16,fp8,0,0.00846560001373291
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,40,4,128,1,fp8,fp8,0,0.009241600334644318
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,40,8,128,1,float16,float16,0,0.009915199875831605
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,40,8,128,1,float16,fp8,0,0.008451200276613235
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,40,8,128,1,fp8,fp8,0,0.008740799874067307
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,40,2,128,1,fp8,fp8,0,0.020745599269866945
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,40,1,128,1,fp8,fp8,0,0.014609600603580474
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,40,40,128,1,float16,fp8,0,0.012577599287033081
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,40,8,128,1,float16,fp8,0,0.010396800190210342
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,1,128,1,float16,fp8,0,11.301322937011719
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,2,128,1,float16,fp8,0,11.371836853027343
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,2,128,1,fp8,fp8,0,11.471353912353516
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,1,128,1,float16,float16,0,14.332868957519532
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,2,128,1,float16,float16,0,14.529280090332032
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,4,128,1,float16,fp8,0,11.418566131591797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,1,128,1,fp8,fp8,0,11.258755493164063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,4,128,1,float16,float16,0,14.510575866699218
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,4,128,1,fp8,fp8,0,11.771343994140626
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,32,128,1,float16,fp8,0,5.862519836425781
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,32,128,1,fp8,fp8,0,5.891395187377929
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,1,128,1,float16,float16,0,7.459327697753906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,8,128,1,float16,fp8,0,11.922122955322266
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,8,128,1,fp8,fp8,0,11.503494262695312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,8,128,1,float16,float16,0,15.176045227050782
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,1,128,1,float16,fp8,0,5.853894424438477
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,1,128,1,fp8,fp8,0,5.718606567382812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,2,128,1,float16,float16,0,7.193926239013672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,2,128,1,float16,fp8,0,5.833515167236328
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,2,128,1,fp8,fp8,0,5.9289295196533205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,4,128,1,float16,float16,0,7.439972686767578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,4,128,1,float16,fp8,0,5.79598388671875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,4,128,1,fp8,fp8,0,5.8578849792480465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,8,128,1,float16,float16,0,7.696729278564453
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,32,128,1,fp8,fp8,0,2.976278305053711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,8,128,1,float16,fp8,0,5.9432014465332035
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,8,128,1,fp8,fp8,0,5.824174499511718
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,1,128,1,float16,float16,0,3.7157840728759766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,1,128,1,fp8,fp8,0,2.8781999588012694
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,1,128,1,float16,fp8,0,3.259473419189453
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,2,128,1,float16,float16,0,3.748369598388672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,32,128,1,float16,fp8,0,3.200107192993164
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,2,128,1,fp8,fp8,0,3.2059886932373045
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,4,128,1,float16,fp8,0,2.8791807174682615
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,4,128,1,float16,float16,0,3.663777542114258
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,2,128,1,float16,fp8,0,2.9156511306762694
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,4,128,1,fp8,fp8,0,3.4103408813476563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,8,128,1,float16,float16,0,3.555049514770508
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,8,128,1,float16,fp8,0,2.855622482299805
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,32,128,1,float16,fp8,0,1.5117759704589844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,32,128,1,fp8,fp8,0,1.5333439826965332
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,8,128,1,fp8,fp8,0,2.891257667541504
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,1,128,1,float16,float16,0,1.8842144012451172
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,1,128,1,float16,fp8,0,1.5282367706298827
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,1,128,1,fp8,fp8,0,1.4562944412231444
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,2,128,1,float16,fp8,0,1.4965056419372558
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,2,128,1,float16,float16,0,1.6907392501831056
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,2,128,1,fp8,fp8,0,1.6669536590576173
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,4,128,1,float16,float16,0,1.7221136093139648
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,4,128,1,float16,fp8,0,1.4663632392883301
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,4,128,1,fp8,fp8,0,1.4786815643310547
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,8,128,1,float16,float16,0,1.869588851928711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,8,128,1,float16,fp8,0,1.4976672172546386
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,8,128,1,fp8,fp8,0,1.7586736679077148
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,1,128,1,float16,fp8,0,6.68957748413086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,1,128,1,fp8,fp8,0,6.610049438476563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,1,128,1,float16,float16,0,8.305039978027343
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,2,128,1,float16,fp8,0,6.587992095947266
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,2,128,1,float16,float16,0,8.435052490234375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,2,128,1,fp8,fp8,0,6.792382049560547
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,4,128,1,fp8,fp8,0,6.691449737548828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,4,128,1,float16,fp8,0,6.747630310058594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,4,128,1,float16,float16,0,8.556719970703124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,8,128,1,float16,float16,0,8.69677734375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,32,128,1,float16,fp8,0,3.654180908203125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,8,128,1,float16,fp8,0,6.803981018066406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,8,128,1,fp8,fp8,0,6.742209625244141
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,32,128,1,fp8,fp8,0,3.517622375488281
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,1,128,1,float16,float16,0,4.250812911987305
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,1,128,1,float16,fp8,0,3.375284957885742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,1,128,1,fp8,fp8,0,3.2145713806152343
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,2,128,1,float16,float16,0,4.174583816528321
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,2,128,1,fp8,fp8,0,3.34789924621582
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,2,128,1,float16,fp8,0,3.7275489807128905
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,4,128,1,float16,float16,0,4.120441436767578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,4,128,1,float16,fp8,0,3.544025421142578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,4,128,1,fp8,fp8,0,3.362646484375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,8,128,1,float16,float16,0,4.295033645629883
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,8,128,1,float16,fp8,0,3.444246292114258
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,8,128,1,fp8,fp8,0,3.359515380859375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,32,128,1,float16,fp8,0,2.0668399810791014
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,32,128,1,fp8,fp8,0,1.7570703506469727
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,1,128,1,float16,fp8,0,1.7070816040039063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,1,128,1,float16,float16,0,2.0177839279174803
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,1,128,1,fp8,fp8,0,1.658083152770996
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,2,128,1,float16,float16,0,1.9537792205810547
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,2,128,1,float16,fp8,0,1.9659200668334962
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,2,128,1,fp8,fp8,0,1.6756336212158203
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,4,128,1,float16,fp8,0,1.6945072174072267
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,4,128,1,float16,float16,0,2.061292839050293
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,4,128,1,fp8,fp8,0,1.716756820678711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,8,128,1,float16,float16,0,1.9681119918823242
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,8,128,1,float16,fp8,0,1.8868431091308593
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,8,128,1,fp8,fp8,0,1.6941104888916017
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,32,128,1,float16,fp8,0,0.9533647537231446
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,32,128,1,fp8,fp8,0,0.9509615898132324
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,1,128,1,float16,float16,0,0.9885408401489257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,1,128,1,fp8,fp8,0,0.8873344421386719
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,2,128,1,float16,float16,0,0.9937727928161622
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,2,128,1,float16,fp8,0,0.9435935974121094
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,1,128,1,float16,fp8,0,0.8766768455505372
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,2,128,1,fp8,fp8,0,0.9055904388427735
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,4,128,1,float16,float16,0,0.9987615585327149
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,4,128,1,float16,fp8,0,0.9022720336914063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,4,128,1,fp8,fp8,0,0.8665568351745605
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,8,128,1,float16,float16,0,1.001039981842041
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,8,128,1,float16,fp8,0,0.8906304359436035
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,8,128,1,fp8,fp8,0,0.9976976394653321
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,1,128,1,float16,fp8,0,4.620843124389649
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,1,128,1,float16,float16,0,5.794271850585938
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,1,128,1,fp8,fp8,0,4.732318496704101
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,2,128,1,float16,float16,0,5.800534439086914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,2,128,1,float16,fp8,0,4.736415863037109
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,2,128,1,fp8,fp8,0,4.863075256347656
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,4,128,1,float16,float16,0,6.060903930664063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,4,128,1,float16,fp8,0,4.722230529785156
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,4,128,1,fp8,fp8,0,4.820819091796875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,8,128,1,float16,float16,0,6.110899353027344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,8,128,1,float16,fp8,0,4.689795303344726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,8,128,1,fp8,fp8,0,4.876712036132813
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,32,128,1,float16,fp8,0,2.516939163208008
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,32,128,1,fp8,fp8,0,2.526134490966797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,1,128,1,float16,float16,0,2.8580032348632813
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,1,128,1,float16,fp8,0,2.770908737182617
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,1,128,1,fp8,fp8,0,2.3885824203491213
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,2,128,1,float16,float16,0,2.9011552810668944
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,2,128,1,float16,fp8,0,2.780567932128906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,2,128,1,fp8,fp8,0,2.3393808364868165
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,4,128,1,float16,float16,0,2.8708751678466795
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,4,128,1,float16,fp8,0,2.6086847305297853
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,4,128,1,fp8,fp8,0,2.360247993469238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,8,128,1,float16,float16,0,2.9029951095581055
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,8,128,1,float16,fp8,0,2.651894378662109
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,8,128,1,fp8,fp8,0,2.4076751708984374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,32,128,1,float16,float16,0,1.473964786529541
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,32,128,1,float16,fp8,0,1.3588687896728515
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,32,128,1,float16,float16,0,1.0501456260681152
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,32,128,1,float16,float16,0,2.9943023681640626
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,32,128,1,fp8,fp8,0,1.2832192420959472
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,32,128,1,float16,float16,0,2.049747276306152
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,32,128,1,float16,float16,0,4.3481792449951175
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,1,128,1,float16,fp8,0,1.3799167633056642
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,32,128,1,float16,float16,0,3.6630191802978516
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,1,128,1,fp8,fp8,0,1.1850751876831054
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,2,128,1,float16,float16,0,1.4503775596618653
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,2,128,1,float16,fp8,0,1.5235872268676758
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,2,128,1,fp8,fp8,0,1.2197551727294922
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,4,128,1,float16,fp8,0,1.184115219116211
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,4,128,1,float16,float16,0,1.555504035949707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,4,128,1,fp8,fp8,0,1.351416015625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,8,128,1,float16,fp8,0,1.1853759765625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,32,128,1,float16,float16,0,1.7722848892211913
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,8,128,1,fp8,fp8,0,1.2084912300109862
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,32,128,1,float16,float16,0,0.847544002532959
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,32,128,1,float16,fp8,0,0.7043680191040039
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,32,128,1,fp8,fp8,0,0.6647039890289307
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,1,128,1,float16,float16,0,0.7880368232727051
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,1,128,1,fp8,fp8,0,0.7598559856414795
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,2,128,1,float16,float16,0,0.7121503829956055
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,2,128,1,float16,fp8,0,0.6271152019500732
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,2,128,1,fp8,fp8,0,0.6841184139251709
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,4,128,1,float16,float16,0,0.7878159999847412
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,4,128,1,float16,fp8,0,0.635865592956543
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,4,128,1,fp8,fp8,0,0.6269919872283936
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,32,128,1,float16,float16,0,7.5872337341308596
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,8,128,1,float16,float16,0,0.7216256141662598
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,8,128,1,float16,fp8,0,0.6500639915466309
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,8,128,1,fp8,fp8,0,0.6419199943542481
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,8,128,1,float16,float16,0,1.4373647689819335
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,1,128,1,float16,float16,0,1.3421695709228516
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,1,128,1,float16,fp8,0,0.632212781906128
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,1,128,1,float16,fp8,0,5.978963088989258
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,1,128,1,fp8,fp8,0,6.033414459228515
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,2,128,1,float16,fp8,0,5.979183959960937
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,1,128,1,float16,float16,0,7.582132720947266
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,2,128,1,fp8,fp8,0,6.020579147338867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,2,128,1,float16,float16,0,7.700039672851562
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,4,128,1,float16,float16,0,7.672625732421875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,4,128,1,float16,fp8,0,6.052422332763672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,32,128,1,float16,fp8,0,3.5873104095458985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,32,128,1,float16,float16,0,4.076599884033203
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,4,128,1,fp8,fp8,0,6.097619247436524
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,32,128,1,fp8,fp8,0,3.3704158782958986
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,8,128,1,float16,fp8,0,6.087863922119141
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,8,128,1,fp8,fp8,0,6.1794689178466795
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,8,128,1,float16,float16,0,8.019459533691407
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,1,128,1,float16,fp8,0,3.0538000106811523
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,1,128,1,float16,float16,0,3.769025421142578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,1,128,1,fp8,fp8,0,3.375203323364258
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,2,128,1,float16,float16,0,3.90478401184082
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,2,128,1,float16,fp8,0,3.0337295532226562
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,2,128,1,fp8,fp8,0,2.9742431640625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,4,128,1,float16,fp8,0,3.304830551147461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,4,128,1,float16,float16,0,3.7890960693359377
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,4,128,1,fp8,fp8,0,3.0742143630981444
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,32,128,1,float16,float16,0,2.0097856521606445
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,32,128,1,float16,fp8,0,1.7139328002929688
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,8,128,1,float16,fp8,0,3.092715263366699
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,8,128,1,float16,float16,0,3.8881294250488283
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,32,128,1,fp8,fp8,0,1.6833824157714843
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,1,128,1,float16,fp8,0,1.549168014526367
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,8,128,1,fp8,fp8,0,3.388198471069336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,1,128,1,float16,float16,0,2.15032958984375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,2,128,1,float16,fp8,0,1.5979151725769043
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,2,128,1,float16,float16,0,1.7728096008300782
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,1,128,1,fp8,fp8,0,1.9039951324462892
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,2,128,1,fp8,fp8,0,1.812868881225586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,4,128,1,float16,fp8,0,1.5200719833374023
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,4,128,1,fp8,fp8,0,1.5517744064331054
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,4,128,1,float16,float16,0,1.8543264389038085
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,32,128,1,float16,float16,0,0.9887344360351562
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,32,128,1,float16,fp8,0,0.8982799530029297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,8,128,1,float16,fp8,0,1.5561840057373046
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,8,128,1,float16,float16,0,1.9953727722167969
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,32,128,1,fp8,fp8,0,0.8531248092651367
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,8,128,1,fp8,fp8,0,1.8717744827270508
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,1,128,1,float16,float16,0,0.888326358795166
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,1,128,1,fp8,fp8,0,0.9122336387634278
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,1,128,1,float16,fp8,0,0.9675215721130371
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,2,128,1,float16,float16,0,0.8926624298095703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,2,128,1,float16,fp8,0,0.7979152202606201
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,4,128,1,float16,float16,0,0.9077152252197266
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,4,128,1,float16,fp8,0,0.846288013458252
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,4,128,1,fp8,fp8,0,0.86080961227417
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,32,128,1,float16,float16,0,0.5158991813659668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,8,128,1,float16,float16,0,0.926144027709961
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,8,128,1,float16,fp8,0,0.8800623893737793
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,32,128,1,fp8,fp8,0,0.46473278999328616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,1,128,1,float16,float16,0,0.48648481369018554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,1,128,1,float16,fp8,0,0.44801921844482423
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,1,128,1,fp8,fp8,0,0.4696352005004883
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,2,128,1,float16,float16,0,0.4781680107116699
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,2,128,1,float16,fp8,0,0.4351247787475586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,2,128,1,fp8,fp8,0,0.4458176136016846
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,4,128,1,float16,float16,0,0.4836400032043457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,4,128,1,float16,fp8,0,0.466431999206543
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,4,128,1,fp8,fp8,0,0.4458928108215332
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,8,128,1,float16,float16,0,0.48265762329101564
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,8,128,1,float16,fp8,0,0.44381279945373536
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,8,128,1,fp8,fp8,0,0.4425968170166016
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,2,128,1,fp8,fp8,0,1.0080143928527832
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,8,128,1,fp8,fp8,0,0.9507007598876953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,1,128,1,float16,fp8,0,3.509998321533203
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,1,128,1,fp8,fp8,0,3.5110912322998047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,2,128,1,float16,fp8,0,3.516321563720703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,1,128,1,float16,float16,0,4.411196899414063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,2,128,1,float16,float16,0,4.412942504882812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,2,128,1,fp8,fp8,0,3.4495983123779297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,32,128,1,float16,fp8,0,0.5043087959289551
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,4,128,1,float16,float16,0,4.3190864562988285
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,4,128,1,float16,fp8,0,3.6252288818359375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,4,128,1,fp8,fp8,0,3.6065345764160157
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,32,128,1,float16,float16,0,2.401728057861328
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,8,128,1,float16,fp8,0,3.8479312896728515
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,8,128,1,float16,float16,0,4.573767852783203
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,32,128,1,float16,fp8,0,1.8942432403564453
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,8,128,1,fp8,fp8,0,3.721500778198242
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,1,128,1,float16,float16,0,2.1320463180541993
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,1,128,1,float16,fp8,0,1.8148015975952148
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,32,128,1,fp8,fp8,0,2.3143808364868166
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,1,128,1,fp8,fp8,0,2.022599983215332
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,2,128,1,float16,float16,0,2.053343963623047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,2,128,1,fp8,fp8,0,1.7452224731445312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,4,128,1,float16,float16,0,2.1188800811767576
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,2,128,1,float16,fp8,0,2.317600059509277
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,4,128,1,fp8,fp8,0,1.7944591522216797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,4,128,1,float16,fp8,0,2.09814567565918
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,32,128,1,float16,float16,0,1.1564720153808594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,8,128,1,float16,float16,0,2.187664031982422
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,32,128,1,float16,fp8,0,1.0803376197814942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,8,128,1,float16,fp8,0,1.785745620727539
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,32,128,1,fp8,fp8,0,1.0179776191711425
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,1,128,1,float16,float16,0,1.0239983558654786
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,1,128,1,float16,fp8,0,1.0427984237670898
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,1,128,1,fp8,fp8,0,1.0705167770385742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,2,128,1,float16,float16,0,1.0151408195495606
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,2,128,1,float16,fp8,0,0.9287903785705567
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,2,128,1,fp8,fp8,0,0.9027551651000977
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,4,128,1,float16,fp8,0,0.9493408203125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,4,128,1,float16,float16,0,1.1320063591003418
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,4,128,1,fp8,fp8,0,0.9595328330993652
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,8,128,1,float16,float16,0,1.0422240257263184
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,8,128,1,float16,fp8,0,0.9079487800598145
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,8,128,1,fp8,fp8,0,0.9032848358154297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,32,128,1,float16,float16,0,0.5997680187225342
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,32,128,1,fp8,fp8,0,0.5321792125701904
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,1,128,1,float16,fp8,0,0.4863471984863281
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,1,128,1,float16,float16,0,0.621295976638794
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,1,128,1,fp8,fp8,0,0.4792928218841553
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,2,128,1,float16,fp8,0,0.4796592235565186
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,2,128,1,float16,float16,0,0.5288959980010987
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,2,128,1,fp8,fp8,0,0.48515520095825193
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,4,128,1,float16,fp8,0,0.47853918075561525
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,4,128,1,float16,float16,0,0.6337408065795899
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,4,128,1,fp8,fp8,0,0.4768320083618164
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,8,128,1,float16,float16,0,0.5423408031463623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,8,128,1,float16,fp8,0,0.47743840217590333
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,32,128,1,float16,float16,0,0.36501278877258303
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,8,128,1,fp8,fp8,0,0.47907199859619143
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,32,128,1,float16,fp8,0,0.2879728078842163
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,32,128,1,fp8,fp8,0,0.299291205406189
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,8,128,1,fp8,fp8,0,2.2406015396118164
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,1,128,1,fp8,fp8,0,0.2664527893066406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,2,128,1,float16,float16,0,0.2829567909240723
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,1,128,1,float16,fp8,0,0.3143264055252075
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,2,128,1,float16,fp8,0,0.2649152040481567
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,2,128,1,fp8,fp8,0,0.2909872055053711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,4,128,1,float16,float16,0,0.28845438957214353
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,4,128,1,float16,fp8,0,0.26400160789489746
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,4,128,1,fp8,fp8,0,0.2638592004776001
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,8,128,1,fp8,fp8,0,0.2756416082382202
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,8,128,1,float16,fp8,0,0.2657552003860474
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,32,128,1,float16,fp8,0,0.6293231964111328
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,1,128,1,float16,float16,0,0.2850480079650879
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,1,128,1,float16,fp8,0,3.2957489013671877
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,1,128,1,fp8,fp8,0,3.356886291503906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,2,128,1,float16,fp8,0,3.2706462860107424
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,8,128,1,float16,float16,0,0.3350559949874878
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,1,128,1,float16,float16,0,4.0959022521972654
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,2,128,1,float16,float16,0,4.036886215209961
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,2,128,1,fp8,fp8,0,3.3563152313232423
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,32,128,1,float16,float16,0,2.4215776443481447
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,4,128,1,float16,fp8,0,3.407011032104492
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,4,128,1,fp8,fp8,0,3.403055953979492
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,8,128,1,float16,fp8,0,3.3039310455322264
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,4,128,1,float16,float16,0,4.065252685546875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,8,128,1,fp8,fp8,0,3.522848129272461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,8,128,1,float16,float16,0,4.213113784790039
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,1,128,1,float16,fp8,0,1.7255023956298827
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,1,128,1,float16,float16,0,1.9700960159301757
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,1,128,1,fp8,fp8,0,1.7104448318481444
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,32,128,1,fp8,fp8,0,2.267550468444824
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,2,128,1,float16,fp8,0,1.6951072692871094
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,2,128,1,float16,float16,0,2.0356927871704102
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,2,128,1,fp8,fp8,0,1.876959991455078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,32,128,1,float16,fp8,0,1.9232528686523438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,4,128,1,fp8,fp8,0,1.6847200393676758
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,4,128,1,float16,float16,0,1.9845439910888671
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,32,128,1,float16,float16,0,1.1131263732910157
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,4,128,1,float16,fp8,0,1.9173599243164063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,8,128,1,float16,fp8,0,1.6796768188476563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,8,128,1,float16,float16,0,2.0246639251708984
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,8,128,1,fp8,fp8,0,1.9328399658203126
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,32,128,1,float16,fp8,0,0.9514575958251953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,32,128,1,fp8,fp8,0,0.9697823524475098
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,1,128,1,fp8,fp8,0,0.8997360229492187
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,1,128,1,float16,fp8,0,1.0052032470703125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,1,128,1,float16,float16,0,1.064083194732666
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,2,128,1,float16,fp8,0,1.0732000350952149
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,2,128,1,fp8,fp8,0,0.878604793548584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,4,128,1,float16,float16,0,0.9640064239501953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,4,128,1,float16,fp8,0,0.853553581237793
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,4,128,1,fp8,fp8,0,0.8632063865661621
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,8,128,1,float16,float16,0,0.9779071807861328
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,8,128,1,float16,fp8,0,1.0134672164916991
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,32,128,1,float16,float16,0,0.5759903907775878
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,32,128,1,float16,fp8,0,0.5861087799072265
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,32,128,1,fp8,fp8,0,0.49825119972229004
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,8,128,1,fp8,fp8,0,1.0419887542724608
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,1,128,1,float16,float16,0,0.48638238906860354
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,1,128,1,float16,fp8,0,0.4881631851196289
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,1,128,1,fp8,fp8,0,0.5001200199127197
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,2,128,1,float16,float16,0,0.4922463893890381
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,2,128,1,fp8,fp8,0,0.44219517707824707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,2,128,1,float16,fp8,0,0.5031536102294922
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,4,128,1,float16,float16,0,0.4988416194915771
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,4,128,1,float16,fp8,0,0.4463039875030518
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,4,128,1,fp8,fp8,0,0.5062543869018554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,8,128,1,float16,float16,0,0.5271520137786865
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,32,128,1,float16,float16,0,0.3117311954498291
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,32,128,1,float16,fp8,0,0.26965439319610596
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,8,128,1,fp8,fp8,0,0.46167840957641604
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,32,128,1,fp8,fp8,0,0.2928704023361206
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,1,128,1,float16,float16,0,0.26235361099243165
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,1,128,1,fp8,fp8,0,0.2407776117324829
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,1,128,1,float16,fp8,0,0.2660896062850952
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,2,128,1,float16,float16,0,0.2637487888336182
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,2,128,1,float16,fp8,0,0.2587615966796875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,2,128,1,fp8,fp8,0,0.25061280727386476
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,4,128,1,float16,float16,0,0.2603408098220825
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,4,128,1,float16,fp8,0,0.2659872055053711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,4,128,1,fp8,fp8,0,0.2530191898345947
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,8,128,1,float16,fp8,0,0.2511375904083252
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,8,128,1,fp8,fp8,0,0.24322240352630614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,32,128,1,float16,fp8,0,0.16251519918441773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,32,128,1,float16,float16,0,0.1780832052230835
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,2,128,1,float16,float16,0,0.9573904037475586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,32,128,1,fp8,fp8,0,0.16125919818878173
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,1,128,1,float16,float16,0,0.1479856014251709
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,1,128,1,float16,fp8,0,0.14703999757766723
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,1,128,1,fp8,fp8,0,0.14408799409866332
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,2,128,1,float16,float16,0,0.14856959581375123
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,2,128,1,fp8,fp8,0,0.14101279973983766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,4,128,1,float16,float16,0,0.15379199981689454
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,4,128,1,float16,fp8,0,0.1431648015975952
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,4,128,1,fp8,fp8,0,0.1408031940460205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,8,128,1,float16,fp8,0,0.14212319850921631
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,8,128,1,fp8,fp8,0,0.14360159635543823
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,8,128,1,float16,fp8,0,0.47014241218566893
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,1,128,1,float16,fp8,0,1.9707504272460938
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,1,128,1,fp8,fp8,0,1.9891696929931642
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,1,128,1,float16,float16,0,2.2624927520751954
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,8,128,1,float16,float16,0,0.2712719917297363
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,2,128,1,float16,fp8,0,0.14625760316848754
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,8,128,1,float16,float16,0,0.15235040187835694
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,2,128,1,float16,float16,0,2.2501359939575196
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,2,128,1,float16,fp8,0,2.0179792404174806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,2,128,1,fp8,fp8,0,1.9718320846557618
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,4,128,1,float16,float16,0,2.503953552246094
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,4,128,1,float16,fp8,0,1.9670143127441406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,4,128,1,fp8,fp8,0,1.966873550415039
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,32,128,1,float16,fp8,0,1.1691136360168457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,32,128,1,float16,float16,0,1.3816656112670898
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,8,128,1,float16,fp8,0,1.9725648880004882
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,8,128,1,float16,float16,0,2.403548812866211
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,1,128,1,float16,float16,0,1.116811180114746
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,32,128,1,fp8,fp8,0,1.4735312461853027
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,1,128,1,float16,fp8,0,1.0027135848999023
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,8,128,1,fp8,fp8,0,2.2849599838256838
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,1,128,1,fp8,fp8,0,1.0010560035705567
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,2,128,1,float16,fp8,0,1.0009615898132325
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,2,128,1,float16,float16,0,1.1985232353210449
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,2,128,1,fp8,fp8,0,1.2061807632446289
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,4,128,1,float16,float16,0,1.1542719841003417
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,4,128,1,fp8,fp8,0,1.0012368202209472
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,8,128,1,float16,fp8,0,1.002393627166748
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,32,128,1,float16,float16,0,0.7650703907012939
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,32,128,1,float16,fp8,0,0.6543968200683594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,8,128,1,fp8,fp8,0,1.0123583793640136
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,32,128,1,fp8,fp8,0,0.7002848148345947
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,1,128,1,float16,float16,0,0.5661456108093261
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,1,128,1,float16,fp8,0,0.516435194015503
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,1,128,1,fp8,fp8,0,0.5981520175933838
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,2,128,1,float16,fp8,0,0.5219823837280273
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,2,128,1,fp8,fp8,0,0.5863279819488525
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,4,128,1,float16,float16,0,0.5755167961120605
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,4,128,1,float16,fp8,0,0.5167712211608887
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,4,128,1,fp8,fp8,0,0.5862432003021241
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,8,128,1,float16,float16,0,0.5952640056610108
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,32,128,1,float16,float16,0,0.354582405090332
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,8,128,1,float16,fp8,0,0.5633327960968018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,8,128,1,fp8,fp8,0,0.583619213104248
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,32,128,1,float16,fp8,0,0.3269504070281982
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,32,128,1,fp8,fp8,0,0.3270479917526245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,1,128,1,float16,float16,0,0.29514079093933104
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,1,128,1,float16,fp8,0,0.2832159996032715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,1,128,1,fp8,fp8,0,0.3104559898376465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,2,128,1,float16,float16,0,0.29736640453338625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,2,128,1,float16,fp8,0,0.3004080057144165
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,2,128,1,fp8,fp8,0,0.2778752088546753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,4,128,1,float16,float16,0,0.297273588180542
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,4,128,1,float16,fp8,0,0.29451038837432864
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,4,128,1,fp8,fp8,0,0.2793087959289551
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,8,128,1,float16,float16,0,0.31064639091491697
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,8,128,1,float16,fp8,0,0.2762048006057739
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,32,128,1,float16,float16,0,0.19434080123901368
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,8,128,1,fp8,fp8,0,0.2803679943084717
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,32,128,1,float16,fp8,0,0.17732000350952148
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,32,128,1,fp8,fp8,0,0.18312159776687623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,1,128,1,fp8,fp8,0,0.15588159561157228
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,2,128,1,float16,float16,0,0.16217119693756105
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,2,128,1,float16,fp8,0,0.1569648027420044
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,4,128,1,float16,fp8,0,1.1030768394470214
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,2,128,1,fp8,fp8,0,0.15572960376739503
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,4,128,1,float16,float16,0,0.16513919830322266
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,4,128,1,float16,fp8,0,0.15715039968490602
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,8,128,1,float16,float16,0,1.1983471870422364
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,4,128,1,fp8,fp8,0,0.1526080012321472
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,8,128,1,float16,float16,0,0.16866559982299806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,8,128,1,float16,fp8,0,0.1559424042701721
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,32,128,1,float16,float16,0,0.11299359798431396
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,32,128,1,float16,fp8,0,0.104476797580719
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,8,128,1,fp8,fp8,0,0.15350719690322875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,32,128,1,fp8,fp8,0,0.10477279424667359
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,1,128,1,float16,float16,0,0.09780319929122924
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,1,128,1,float16,fp8,0,0.0939903974533081
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,1,128,1,fp8,fp8,0,0.093612802028656
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,2,128,1,float16,float16,0,0.09845439791679382
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,2,128,1,float16,fp8,0,0.0938368022441864
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,2,128,1,fp8,fp8,0,0.09394720196723938
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,4,128,1,float16,float16,0,0.09814239740371704
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,2,128,1,float16,float16,0,0.5672336101531983
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,4,128,1,float16,fp8,0,0.09406080245971679
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,4,128,1,fp8,fp8,0,0.09447839856147766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,8,128,1,float16,float16,0,0.10096479654312134
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,8,128,1,float16,fp8,0,0.0935696005821228
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,8,128,1,fp8,fp8,0,0.09409120082855224
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,1,128,1,float16,fp8,0,1.9735136032104492
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,1,128,1,fp8,fp8,0,1.9728015899658202
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,2,128,1,float16,fp8,0,1.9755056381225586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,1,128,1,float16,float16,0,0.1600559949874878
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,2,128,1,float16,float16,0,2.286795234680176
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,2,128,1,fp8,fp8,0,1.9868879318237305
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,4,128,1,float16,fp8,0,2.0020240783691405
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,4,128,1,fp8,fp8,0,1.9748640060424805
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,4,128,1,float16,float16,0,2.3665647506713867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,8,128,1,float16,float16,0,2.3730287551879883
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,8,128,1,float16,fp8,0,2.245684814453125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,32,128,1,float16,fp8,0,1.2194671630859375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,1,128,1,float16,float16,0,2.165627288818359
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,1,128,1,float16,fp8,0,0.15891200304031372
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,32,128,1,float16,float16,0,1.441153621673584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,1,128,1,float16,fp8,0,1.0349311828613281
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,1,128,1,float16,float16,0,1.2049776077270509
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,32,128,1,fp8,fp8,0,1.5836432456970215
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,8,128,1,fp8,fp8,0,2.0041711807250975
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,1,128,1,fp8,fp8,0,1.0070223808288574
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,2,128,1,float16,fp8,0,0.9970047950744629
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,2,128,1,float16,float16,0,1.0902128219604492
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,2,128,1,fp8,fp8,0,1.1752752304077148
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,4,128,1,float16,float16,0,1.128985595703125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,4,128,1,float16,fp8,0,1.160428810119629
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,4,128,1,fp8,fp8,0,1.0761695861816407
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,8,128,1,float16,float16,0,1.1604736328125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,32,128,1,float16,float16,0,0.7175680160522461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,32,128,1,float16,fp8,0,0.6459216117858887
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,8,128,1,float16,fp8,0,1.0007616043090821
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,1,128,1,float16,float16,0,0.5553376197814941
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,8,128,1,fp8,fp8,0,0.9978128433227539
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,1,128,1,float16,fp8,0,0.5202335834503173
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,1,128,1,fp8,fp8,0,0.5814015865325928
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,2,128,1,float16,float16,0,0.550492811203003
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,2,128,1,float16,fp8,0,0.5276112079620361
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,2,128,1,fp8,fp8,0,0.5113327980041504
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,4,128,1,float16,fp8,0,0.5112287998199463
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,4,128,1,fp8,fp8,0,0.5175727844238281
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,8,128,1,float16,float16,0,0.5963583946228027
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,8,128,1,float16,fp8,0,0.5327040195465088
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,32,128,1,float16,fp8,0,0.3231920003890991
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,32,128,1,fp8,fp8,0,0.32510240077972413
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,8,128,1,fp8,fp8,0,0.5289743900299072
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,1,128,1,float16,float16,0,0.28878400325775144
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,1,128,1,float16,fp8,0,0.27772960662841795
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,1,128,1,fp8,fp8,0,0.26893279552459715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,2,128,1,float16,float16,0,0.28578720092773435
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,2,128,1,float16,fp8,0,0.27093920707702634
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,2,128,1,fp8,fp8,0,0.2814176082611084
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,4,128,1,float16,float16,0,0.2898128032684326
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,4,128,1,float16,fp8,0,0.26961920261383054
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,4,128,1,fp8,fp8,0,0.27009758949279783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,8,128,1,float16,float16,0,0.29985599517822265
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,8,128,1,float16,fp8,0,0.27704639434814454
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,8,128,1,fp8,fp8,0,0.27879838943481444
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,32,128,1,fp8,fp8,0,0.175927996635437
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,1,128,1,float16,float16,0,0.15520960092544556
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,1,128,1,float16,fp8,0,0.15219680070877076
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,1,128,1,fp8,fp8,0,0.14689279794692994
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,2,128,1,float16,float16,0,0.1553328037261963
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,2,128,1,float16,fp8,0,0.15121599435806274
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,4,128,1,float16,float16,0,0.15807039737701417
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,4,128,1,float16,fp8,0,0.14886399507522582
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,4,128,1,fp8,fp8,0,0.1499071955680847
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,8,128,1,float16,float16,0,0.16002559661865234
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,8,128,1,float16,fp8,0,0.149726402759552
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,8,128,1,fp8,fp8,0,0.14793280363082886
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,32,128,1,float16,float16,0,0.10909119844436646
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,32,128,1,float16,fp8,0,0.10103679895401001
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,32,128,1,fp8,fp8,0,0.7046607971191406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,32,128,1,fp8,fp8,0,0.10058720111846924
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,1,128,1,float16,fp8,0,0.08532480001449586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,2,128,1,float16,float16,0,0.08844959735870361
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,4,128,1,float16,float16,0,0.5591599941253662
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,2,128,1,fp8,fp8,0,0.08654879927635192
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,4,128,1,float16,float16,0,0.09173280000686646
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,32,128,1,float16,float16,0,0.36345601081848145
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,4,128,1,float16,fp8,0,0.08585439920425415
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,4,128,1,fp8,fp8,0,0.08568959832191467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,8,128,1,float16,float16,0,0.0937391996383667
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,8,128,1,float16,fp8,0,0.08610399961471557
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,8,128,1,fp8,fp8,0,0.08608959913253784
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,32,128,1,float16,float16,0,0.1911120057106018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,32,128,1,float16,fp8,0,0.17530879974365235
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,32,128,1,float16,fp8,0,0.0611840009689331
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,32,128,1,fp8,fp8,0,0.06146240234375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,1,128,1,float16,float16,0,0.057571202516555786
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,1,128,1,float16,fp8,0,0.05492479801177978
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,1,128,1,fp8,fp8,0,0.05470560193061828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,2,128,1,float16,float16,0,0.0577888011932373
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,2,128,1,fp8,fp8,0,0.14733760356903075
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,2,128,1,fp8,fp8,0,0.05462560057640076
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,4,128,1,float16,float16,0,0.05875679850578308
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,4,128,1,float16,fp8,0,0.05496479868888855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,4,128,1,fp8,fp8,0,0.055289602279663085
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,8,128,1,float16,float16,0,0.05951200127601623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,8,128,1,float16,fp8,0,0.05540000200271607
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,8,128,1,fp8,fp8,0,0.05461440086364746
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,1,128,1,float16,float16,0,0.08958079814910888
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,1,128,1,fp8,fp8,0,0.08587520122528076
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,2,128,1,float16,fp8,0,0.0855455994606018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,1,128,1,float16,float16,0,1.339355182647705
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,1,128,1,float16,fp8,0,1.2432208061218262
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,1,128,1,fp8,fp8,0,1.2433823585510253
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,2,128,1,float16,float16,0,1.3439824104309082
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,2,128,1,float16,fp8,0,1.242191982269287
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,2,128,1,fp8,fp8,0,1.241385555267334
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,32,128,1,float16,float16,0,0.06740639805793762
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,4,128,1,float16,fp8,0,1.2425567626953125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,4,128,1,float16,float16,0,1.4017727851867676
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,2,128,1,float16,fp8,0,0.05459679961204529
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,4,128,1,fp8,fp8,0,1.2438048362731933
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,32,128,1,float16,float16,0,0.922702407836914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,32,128,1,float16,fp8,0,0.7922912120819092
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,8,128,1,float16,float16,0,1.4785167694091796
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,1,128,1,float16,float16,0,0.66910719871521
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,8,128,1,fp8,fp8,0,1.2424752235412597
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,1,128,1,float16,fp8,0,0.6369103908538818
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,32,128,1,fp8,fp8,0,0.9661871910095214
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,1,128,1,fp8,fp8,0,0.6319744110107421
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,2,128,1,float16,float16,0,0.6798624038696289
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,2,128,1,fp8,fp8,0,0.6400703907012939
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,2,128,1,float16,fp8,0,0.7838704109191894
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,4,128,1,float16,fp8,0,0.6324944019317627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,4,128,1,fp8,fp8,0,0.7048175811767579
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,8,128,1,float16,float16,0,0.7257311820983887
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,32,128,1,float16,fp8,0,0.4077648162841797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,8,128,1,float16,fp8,0,0.6707664012908936
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,8,128,1,fp8,fp8,0,0.6399456024169922
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,32,128,1,float16,float16,0,0.521123218536377
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,32,128,1,fp8,fp8,0,0.4244192123413086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,1,128,1,float16,float16,0,0.3430480003356934
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,2,128,1,float16,float16,0,0.34217119216918945
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,2,128,1,float16,fp8,0,0.3411184072494507
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,4,128,1,float16,float16,0,0.3529711961746216
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,4,128,1,float16,fp8,0,0.3316447973251343
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,4,128,1,fp8,fp8,0,0.33403520584106444
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,8,128,1,float16,fp8,0,0.32857279777526854
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,8,128,1,float16,float16,0,0.3747663974761963
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,8,128,1,fp8,fp8,0,0.33465759754180907
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,32,128,1,float16,float16,0,0.23800959587097167
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,32,128,1,fp8,fp8,0,0.21734559535980225
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,1,128,1,float16,float16,0,0.17807040214538575
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,1,128,1,float16,fp8,0,0.1760640025138855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,8,128,1,float16,fp8,0,1.243120002746582
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,1,128,1,fp8,fp8,0,0.17477920055389404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,2,128,1,float16,float16,0,0.17836320400238037
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,2,128,1,float16,fp8,0,0.1736016035079956
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,2,128,1,fp8,fp8,0,0.17444640398025513
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,4,128,1,float16,float16,0,0.18202879428863525
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,4,128,1,float16,fp8,0,0.17469279766082763
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,4,128,1,fp8,fp8,0,0.17468639612197875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,8,128,1,float16,float16,0,0.19006240367889404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,8,128,1,float16,fp8,0,0.17495839595794677
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,8,128,1,fp8,fp8,0,0.17498079538345337
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,32,128,1,float16,float16,0,0.12949279546737671
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,32,128,1,float16,fp8,0,0.11970720291137696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,32,128,1,fp8,fp8,0,0.11896159648895263
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,1,128,1,float16,float16,0,0.10087679624557495
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,1,128,1,float16,fp8,0,0.09750239849090576
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,2,128,1,float16,float16,0,0.10123200416564941
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,2,128,1,float16,fp8,0,0.09732800126075744
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,2,128,1,fp8,fp8,0,0.09720159769058227
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,4,128,1,float16,float16,0,0.1032256007194519
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,4,128,1,float16,fp8,0,0.09771999716758728
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,1,128,1,float16,fp8,0,0.32704958915710447
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,4,128,1,fp8,fp8,0,0.09759039878845215
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,1,128,1,fp8,fp8,0,0.3518143892288208
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,8,128,1,float16,float16,0,0.10760799646377564
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,2,128,1,fp8,fp8,0,0.3293567895889282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,8,128,1,float16,fp8,0,0.09794880151748657
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,8,128,1,fp8,fp8,0,0.09828000068664551
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,32,128,1,float16,float16,0,0.07594879865646362
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,32,128,1,float16,fp8,0,0.06881279945373535
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,32,128,1,fp8,fp8,0,0.06925920248031617
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,1,128,1,float16,float16,0,0.06101599931716919
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,1,128,1,float16,fp8,0,0.05847039818763733
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,1,128,1,fp8,fp8,0,0.059006398916244505
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,2,128,1,float16,float16,0,0.060950398445129395
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,2,128,1,float16,fp8,0,0.05917919874191284
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,2,128,1,fp8,fp8,0,0.058503997325897214
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,4,128,1,float16,float16,0,0.062052798271179196
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,4,128,1,float16,fp8,0,0.05865439772605896
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,4,128,1,fp8,fp8,0,0.05920479893684387
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,8,128,1,float16,float16,0,0.06368319988250733
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,8,128,1,fp8,fp8,0,0.05899680256843567
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,32,128,1,float16,float16,0,0.04526880085468292
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,32,128,1,float16,fp8,0,0.04325920045375824
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,32,128,1,fp8,fp8,0,0.043268799781799316
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,1,128,1,float16,fp8,0,0.03913280069828033
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,2,128,1,float16,float16,0,0.041254401206970215
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,2,128,1,float16,fp8,0,0.039131200313568114
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,32,128,1,float16,fp8,0,0.22882239818572997
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,2,128,1,fp8,fp8,0,0.03916319906711578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,4,128,1,float16,float16,0,0.04121919870376587
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,4,128,1,float16,fp8,0,0.039113599061965945
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,4,128,1,fp8,fp8,0,0.03916159868240356
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,8,128,1,float16,float16,0,0.04241760075092316
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,8,128,1,float16,fp8,0,0.03916159868240356
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,8,128,1,fp8,fp8,0,0.03917120099067688
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,4,128,1,float16,float16,0,0.6865664005279541
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,1,128,1,fp8,fp8,0,0.09731199741363525
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,1,128,1,float16,float16,0,1.381220817565918
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,1,128,1,float16,fp8,0,1.3272975921630858
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,1,128,1,fp8,fp8,0,1.3258959770202636
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,8,128,1,float16,fp8,0,0.05904160141944885
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,1,128,1,float16,float16,0,0.04118880033493042
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,2,128,1,float16,float16,0,1.3801759719848632
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,1,128,1,fp8,fp8,0,0.039131200313568114
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,2,128,1,float16,fp8,0,1.3825023651123047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,4,128,1,float16,float16,0,1.4515055656433105
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,4,128,1,float16,fp8,0,1.3272527694702148
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,4,128,1,fp8,fp8,0,1.3294431686401367
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,8,128,1,float16,fp8,0,1.3295151710510253
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,8,128,1,fp8,fp8,0,1.364031982421875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,8,128,1,float16,float16,0,1.6694992065429688
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,32,128,1,float16,float16,0,1.023470401763916
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,1,128,1,float16,float16,0,0.6941023826599121
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,32,128,1,float16,fp8,0,0.9744768142700195
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,32,128,1,fp8,fp8,0,0.8859888076782226
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,1,128,1,float16,fp8,0,0.671340799331665
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,1,128,1,fp8,fp8,0,0.6765759944915771
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,2,128,1,float16,float16,0,0.7547760009765625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,2,128,1,float16,fp8,0,0.6723648071289062
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,2,128,1,fp8,fp8,0,0.6723680019378662
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,4,128,1,float16,float16,0,0.7157728195190429
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,4,128,1,float16,fp8,0,0.6719888210296631
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,4,128,1,fp8,fp8,0,0.6713776111602783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,8,128,1,float16,float16,0,0.7630815982818604
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,8,128,1,float16,fp8,0,0.7263472080230713
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,8,128,1,fp8,fp8,0,0.6728303909301758
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,32,128,1,float16,float16,0,0.5141712188720703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,1,128,1,float16,float16,0,0.3510080099105835
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,32,128,1,float16,fp8,0,0.4600687980651855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,2,128,1,fp8,fp8,0,1.3260224342346192
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,32,128,1,fp8,fp8,0,0.45119199752807615
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,1,128,1,float16,fp8,0,0.3444799900054932
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,1,128,1,fp8,fp8,0,0.3695472002029419
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,2,128,1,float16,float16,0,0.3500128030776978
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,2,128,1,float16,fp8,0,0.35368800163269043
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,2,128,1,fp8,fp8,0,0.34428160190582274
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,4,128,1,float16,float16,0,0.3606719970703125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,4,128,1,float16,fp8,0,0.34427518844604493
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,4,128,1,fp8,fp8,0,0.34515678882598877
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,8,128,1,float16,float16,0,0.38555519580841063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,8,128,1,float16,fp8,0,0.35767040252685545
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,8,128,1,fp8,fp8,0,0.34622879028320314
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,32,128,1,float16,float16,0,0.26547040939331057
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,32,128,1,float16,fp8,0,0.23499839305877684
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,32,128,1,fp8,fp8,0,0.23785920143127443
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,2,128,1,float16,float16,0,0.18256640434265137
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,2,128,1,float16,fp8,0,0.1820479989051819
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,2,128,1,fp8,fp8,0,0.1874176025390625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,4,128,1,float16,float16,0,0.18759360313415527
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,4,128,1,float16,fp8,0,0.18400800228118896
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,4,128,1,fp8,fp8,0,0.18624479770660402
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,8,128,1,float16,float16,0,0.19612159729003906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,8,128,1,fp8,fp8,0,0.18323359489440919
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,8,128,1,float16,fp8,0,0.1880735993385315
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,32,128,1,float16,float16,0,0.13932960033416747
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,32,128,1,float16,fp8,0,0.1283455967903137
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,32,128,1,fp8,fp8,0,0.12668960094451903
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,1,128,1,float16,float16,0,0.101310396194458
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,1,128,1,float16,fp8,0,0.10211039781570434
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,1,128,1,fp8,fp8,0,0.09996479749679565
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,2,128,1,float16,float16,0,0.10216319561004639
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,2,128,1,float16,fp8,0,0.09896960258483886
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,2,128,1,fp8,fp8,0,0.0992464005947113
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,4,128,1,float16,float16,0,0.10715680122375489
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,4,128,1,float16,fp8,0,0.09983839988708496
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,4,128,1,fp8,fp8,0,0.10100640058517456
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,8,128,1,float16,float16,0,0.10758719444274903
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,8,128,1,float16,fp8,0,0.09959840178489685
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,32,128,1,float16,float16,0,0.07911520004272461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,32,128,1,fp8,fp8,0,0.07193440198898315
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,1,128,1,float16,float16,0,0.05862560272216797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,1,128,1,fp8,fp8,0,0.057627201080322266
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,2,128,1,float16,float16,0,0.058713597059249875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,2,128,1,float16,fp8,0,0.05764639973640442
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,2,128,1,fp8,fp8,0,0.05753440260887146
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,4,128,1,float16,float16,0,0.06005120277404785
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,4,128,1,float16,fp8,0,0.05722879767417908
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,4,128,1,fp8,fp8,0,0.057468801736831665
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,8,128,1,float16,float16,0,0.06282240152359009
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,8,128,1,float16,fp8,0,0.05730239748954773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,8,128,1,fp8,fp8,0,0.05762240290641785
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,32,128,1,float16,float16,0,0.0474592000246048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,32,128,1,float16,fp8,0,0.04317600131034851
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,32,128,1,fp8,fp8,0,0.04327679872512817
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,1,128,1,float16,float16,0,0.03808160126209259
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,1,128,1,float16,fp8,0,0.03705599904060364
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,1,128,1,fp8,fp8,0,0.037038400769233704
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,1,128,1,float16,float16,0,0.18196640014648438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,1,128,1,float16,fp8,0,0.18196480274200438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,2,128,1,float16,float16,0,0.038529598712921144
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,2,128,1,float16,fp8,0,0.0370959997177124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,4,128,1,float16,float16,0,0.03881439864635468
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,2,128,1,fp8,fp8,0,0.03710399866104126
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,1,128,1,fp8,fp8,0,0.18451839685440063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,4,128,1,float16,fp8,0,0.037036800384521486
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,4,128,1,fp8,fp8,0,0.03701759874820709
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,8,128,1,float16,float16,0,0.039345601201057435
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,8,128,1,float16,fp8,0,0.03711679875850678
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,8,128,1,fp8,fp8,0,0.0370959997177124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,32,128,1,float16,float16,0,0.02678079903125763
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,32,128,1,float16,fp8,0,0.026820799708366393
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,32,128,1,fp8,fp8,0,0.02688319981098175
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,1,128,1,float16,float16,0,0.024835200607776643
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,1,128,1,float16,fp8,0,0.022780799865722658
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,1,128,1,fp8,fp8,0,0.023481599986553192
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,2,128,1,float16,float16,0,0.024831999838352204
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,2,128,1,fp8,fp8,0,0.02276480048894882
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,4,128,1,float16,float16,0,0.02479359954595566
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,4,128,1,fp8,fp8,0,0.022752000391483305
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,8,128,1,float16,float16,0,0.024766400456428528
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,8,128,1,float16,fp8,0,0.02280319929122925
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,8,128,1,fp8,fp8,0,0.023695999383926393
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,8,128,1,fp8,fp8,0,0.10080959796905517
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,32,128,1,float16,fp8,0,0.07335360050201416
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,1,128,1,float16,fp8,0,0.057956802845001223
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,32,1,128,1,float16,float16,0,0.9671919822692872
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,32,1,128,1,float16,fp8,0,1.0025055885314942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,32,2,128,1,float16,float16,0,0.987116813659668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,32,1,128,1,fp8,fp8,0,1.0045200347900392
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,32,2,128,1,float16,fp8,0,1.00033597946167
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,32,2,128,1,fp8,fp8,0,1.0553903579711914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,32,4,128,1,float16,float16,0,1.0301168441772461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,32,4,128,1,float16,fp8,0,1.0022000312805175
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,32,4,128,1,fp8,fp8,0,1.0023920059204101
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,32,8,128,1,float16,float16,0,1.1113712310791015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,2,128,1,float16,fp8,0,0.023257599771022798
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,4,128,1,float16,fp8,0,0.023185600340366364
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,32,8,128,1,float16,fp8,0,1.0338784217834474
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,32,8,128,1,fp8,fp8,0,1.1059712409973144
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,32,32,128,1,float16,fp8,0,0.7169087886810303
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,32,32,128,1,float16,float16,0,0.8275712013244629
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,32,32,128,1,fp8,fp8,0,0.7854656219482422
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,32,1,128,1,float16,float16,0,0.4933199882507324
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,32,1,128,1,float16,fp8,0,0.5067232131958008
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,32,1,128,1,fp8,fp8,0,0.5081664085388183
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,32,2,128,1,float16,float16,0,0.4930255889892578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,32,2,128,1,float16,fp8,0,0.5314911842346192
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,32,2,128,1,fp8,fp8,0,0.508190393447876
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,32,4,128,1,float16,float16,0,0.5177023887634278
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,32,4,128,1,float16,fp8,0,0.5373551845550537
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,32,4,128,1,fp8,fp8,0,0.5069856166839599
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,32,8,128,1,float16,float16,0,0.5561200141906738
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,32,8,128,1,fp8,fp8,0,0.5078623771667481
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,32,32,128,1,float16,float16,0,0.41431198120117185
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,32,32,128,1,float16,fp8,0,0.3652928113937378
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,32,32,128,1,fp8,fp8,0,0.3649456024169922
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,32,1,128,1,float16,float16,0,0.25785279273986816
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,32,1,128,1,float16,fp8,0,0.26152160167694094
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,32,1,128,1,fp8,fp8,0,0.26112320423126223
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,32,2,128,1,float16,float16,0,0.25269279479980467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,32,2,128,1,float16,fp8,0,0.26395840644836427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,32,2,128,1,fp8,fp8,0,0.26143679618835447
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,32,4,128,1,float16,float16,0,0.26340959072113035
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,32,4,128,1,fp8,fp8,0,0.26124160289764403
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,32,8,128,1,float16,float16,0,0.2820847988128662
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,32,8,128,1,float16,fp8,0,0.2617824077606201
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,32,32,128,1,float16,fp8,0,0.19026240110397338
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,32,8,128,1,fp8,fp8,0,0.2638303995132446
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,32,1,128,1,float16,float16,0,0.13494399785995484
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,32,1,128,1,float16,fp8,0,0.13814079761505127
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,32,1,128,1,fp8,fp8,0,0.13770560026168824
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,32,2,128,1,float16,float16,0,0.13682399988174437
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,32,2,128,1,float16,fp8,0,0.13802560567855834
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,32,2,128,1,fp8,fp8,0,0.13803999423980712
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,32,4,128,1,float16,float16,0,0.13935040235519408
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,32,4,128,1,float16,fp8,0,0.13992320299148558
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,32,4,128,1,fp8,fp8,0,0.13792959451675416
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,32,8,128,1,float16,float16,0,0.14860479831695556
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,32,8,128,1,float16,fp8,0,0.1380720019340515
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,32,8,128,1,fp8,fp8,0,0.1400720000267029
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,32,32,128,1,float16,float16,0,0.11254080533981323
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,32,32,128,1,float16,fp8,0,0.10267360210418701
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,32,32,128,1,fp8,fp8,0,0.10276479721069336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,32,1,128,1,float16,float16,0,0.07689759731292725
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,32,1,128,1,float16,fp8,0,0.07487840056419373
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,32,1,128,1,fp8,fp8,0,0.07548159956932068
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,32,2,128,1,float16,float16,0,0.0769312024116516
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,32,2,128,1,float16,fp8,0,0.07582240104675293
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,32,2,128,1,fp8,fp8,0,0.07614399790763855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,32,4,128,1,float16,float16,0,0.07904800176620483
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,32,4,128,1,fp8,fp8,0,0.07605440020561219
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,32,8,128,1,float16,float16,0,0.08347520232200623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,32,8,128,1,float16,fp8,0,0.07663840055465698
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,32,8,128,1,fp8,fp8,0,0.07624319791793824
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,32,32,128,1,float16,float16,0,0.06353759765625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,32,32,128,1,float16,fp8,0,0.0576416015625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,32,1,128,1,float16,float16,0,0.043356800079345705
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,32,1,128,1,float16,fp8,0,0.043268799781799316
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,32,1,128,1,fp8,fp8,0,0.04323199987411499
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,32,2,128,1,float16,float16,0,0.043347200751304625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,32,2,128,1,float16,fp8,0,0.04324159920215607
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,32,2,128,1,fp8,fp8,0,0.0432671993970871
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,32,4,128,1,float16,float16,0,0.045311999320983884
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,32,4,128,1,float16,fp8,0,0.04324640035629272
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,32,4,128,1,fp8,fp8,0,0.04325119853019714
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,32,8,128,1,float16,float16,0,0.04797599911689758
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,32,4,128,1,float16,fp8,0,0.2659392118453979
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,32,8,128,1,float16,fp8,0,0.043249601125717164
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,32,8,128,1,fp8,fp8,0,0.043196800351142886
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,32,32,128,1,float16,float16,0,0.03708640038967133
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,32,32,128,1,float16,fp8,0,0.03329600095748901
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,32,32,128,1,fp8,fp8,0,0.03384479880332947
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,32,1,128,1,float16,float16,0,0.02858240008354187
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,32,32,128,1,fp8,fp8,0,0.18944959640502929
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,32,1,128,1,float16,fp8,0,0.026974400877952574
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,32,1,128,1,fp8,fp8,0,0.0268528014421463
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,32,2,128,1,float16,float16,0,0.02890079915523529
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,32,2,128,1,float16,fp8,0,0.027497598528862
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,32,2,128,1,fp8,fp8,0,0.028060799837112425
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,32,4,128,1,float16,float16,0,0.028896000981330872
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,32,4,128,1,float16,fp8,0,0.027272000908851624
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,32,4,128,1,fp8,fp8,0,0.028163200616836546
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,32,8,128,1,float16,fp8,0,0.028591999411582948
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,32,8,128,1,fp8,fp8,0,0.027987200021743774
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,32,32,128,1,float16,float16,0,0.02075359970331192
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,32,32,128,1,float16,fp8,0,0.020694400370121
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,32,32,128,1,fp8,fp8,0,0.020729599893093108
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,32,1,128,1,float16,float16,0,0.018662400543689728
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,32,1,128,1,float16,fp8,0,0.018588800728321076
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,32,2,128,1,float16,float16,0,0.01912959963083267
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,32,2,128,1,float16,fp8,0,0.01801919937133789
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,32,4,128,1,float16,fp8,0,0.07598559856414795
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,32,2,128,1,fp8,fp8,0,0.017972800135612487
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,32,4,128,1,float16,float16,0,0.01857600063085556
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,32,4,128,1,float16,fp8,0,0.01852159947156906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,32,4,128,1,fp8,fp8,0,0.018611200153827667
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,32,8,128,1,float16,fp8,0,0.01860480010509491
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,32,8,128,1,fp8,fp8,0,0.018166400492191315
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,32,32,128,1,fp8,fp8,0,0.05816799998283386
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,32,32,128,1,float16,float16,0,0.018624000251293182
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,32,32,128,1,float16,fp8,0,0.018580800294876097
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,32,32,128,1,fp8,fp8,0,0.01860480010509491
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,32,1,128,1,float16,float16,0,0.016596800088882445
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,32,1,128,1,float16,fp8,0,0.01653279960155487
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,32,1,128,1,fp8,fp8,0,0.016540800034999848
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,32,2,128,1,float16,float16,0,0.01656160056591034
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,32,2,128,1,float16,fp8,0,0.016524800658226015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,32,2,128,1,fp8,fp8,0,0.016505600512027742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,32,4,128,1,float16,float16,0,0.01659200042486191
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,32,4,128,1,float16,fp8,0,0.016572800278663636
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,32,4,128,1,fp8,fp8,0,0.016601599752902985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,32,8,128,1,float16,float16,0,0.016568000614643096
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,32,8,128,1,float16,fp8,0,0.0166143998503685
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,32,8,128,1,fp8,fp8,0,0.01652639955282211
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,32,8,128,1,float16,fp8,0,0.5074704170227051
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,32,1,128,1,float16,float16,0,0.39077761173248293
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,32,1,128,1,float16,fp8,0,0.41883201599121095
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,32,1,128,1,fp8,fp8,0,0.41809759140014646
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,32,2,128,1,float16,float16,0,0.392630410194397
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,32,32,128,1,float16,float16,0,0.21125280857086182
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,32,2,128,1,float16,fp8,0,0.41938080787658694
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,32,2,128,1,fp8,fp8,0,0.41909117698669435
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,32,4,128,1,float16,float16,0,0.41182398796081543
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,32,4,128,1,float16,fp8,0,0.4191472053527832
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,32,8,128,1,float16,float16,0,0.029440000653266907
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,32,4,128,1,fp8,fp8,0,0.41869120597839354
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,32,8,128,1,float16,float16,0,0.45606241226196287
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,32,8,128,1,float16,fp8,0,0.41904001235961913
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,32,32,128,1,float16,float16,0,0.3582911968231201
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,32,8,128,1,fp8,fp8,0,0.4179711818695068
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,32,32,128,1,float16,fp8,0,0.3158735990524292
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,32,1,128,1,float16,float16,0,0.20324959754943847
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,32,2,128,1,float16,float16,0,0.20324161052703857
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,32,32,128,1,fp8,fp8,0,0.3174175977706909
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,32,2,128,1,float16,fp8,0,0.21412959098815917
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,32,2,128,1,fp8,fp8,0,0.21539840698242188
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,32,4,128,1,float16,float16,0,0.2130863904953003
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,32,4,128,1,float16,fp8,0,0.21558239459991455
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,32,8,128,1,float16,float16,0,0.018643200397491455
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,32,4,128,1,fp8,fp8,0,0.21530559062957763
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,32,8,128,1,float16,float16,0,0.2334144115447998
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,32,8,128,1,float16,fp8,0,0.21512000560760497
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,32,8,128,1,fp8,fp8,0,0.21600799560546874
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,32,32,128,1,float16,float16,0,0.18463040590286256
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,32,32,128,1,float16,fp8,0,0.16401599645614623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,32,1,128,1,float16,float16,0,0.10912319421768188
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,32,1,128,1,float16,fp8,0,0.11244800090789794
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,32,32,128,1,fp8,fp8,0,0.16389280557632446
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,32,2,128,1,float16,float16,0,0.10934079885482788
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,32,2,128,1,float16,fp8,0,0.1131168007850647
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,32,2,128,1,fp8,fp8,0,0.11271519660949707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,32,4,128,1,float16,float16,0,0.11383359432220459
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,32,4,128,1,float16,fp8,0,0.11336159706115723
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,32,4,128,1,fp8,fp8,0,0.11345280408859253
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,32,8,128,1,float16,float16,0,0.12314879894256592
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,32,8,128,1,float16,fp8,0,0.11375199556350708
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,32,8,128,1,fp8,fp8,0,0.11326080560684204
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,32,32,128,1,float16,float16,0,0.09822880029678345
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,32,32,128,1,float16,fp8,0,0.087363201379776
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,32,1,128,1,float16,float16,0,0.060545599460601805
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,32,32,128,1,fp8,fp8,0,0.08727999925613403
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,32,1,128,1,float16,fp8,0,0.06033440232276917
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,32,1,128,1,fp8,fp8,0,0.05987359881401062
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,32,2,128,1,float16,float16,0,0.060254400968551634
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,32,2,128,1,float16,fp8,0,0.060924798250198364
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,32,4,128,1,float16,float16,0,0.06265119910240173
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,32,4,128,1,float16,fp8,0,0.061643201112747195
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,32,4,128,1,fp8,fp8,0,0.06094080209732056
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,32,8,128,1,float16,float16,0,0.06784639954566955
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,32,8,128,1,float16,fp8,0,0.061724799871444705
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,32,8,128,1,fp8,fp8,0,0.06171839833259583
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,32,32,128,1,float16,float16,0,0.05653759837150574
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,32,32,128,1,fp8,fp8,0,0.05092480182647705
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,32,32,128,1,float16,fp8,0,0.051337599754333496
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,32,1,128,1,float16,float16,0,0.035231998562812804
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,32,1,128,1,float16,fp8,0,0.03662880063056946
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,32,1,128,1,fp8,fp8,0,0.03655839860439301
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,32,2,128,1,float16,float16,0,0.03521760106086731
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,32,2,128,1,float16,fp8,0,0.03622399866580963
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,32,2,128,1,fp8,fp8,0,0.03615039885044098
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,32,4,128,1,float16,float16,0,0.03709760010242462
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,32,1,128,1,float16,fp8,0,0.21503360271453859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,32,1,128,1,fp8,fp8,0,0.2150928020477295
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,32,4,128,1,fp8,fp8,0,0.03691680133342743
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,32,8,128,1,float16,float16,0,0.040278398990631105
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,32,8,128,1,float16,fp8,0,0.03709119856357575
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,32,32,128,1,float16,float16,0,0.030990400910377504
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,32,8,128,1,fp8,fp8,0,0.03713760077953339
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,32,32,128,1,float16,fp8,0,0.028830400109291075
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,32,32,128,1,fp8,fp8,0,0.02884320020675659
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,32,1,128,1,float16,float16,0,0.02272319942712784
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,32,1,128,1,float16,fp8,0,0.022567999362945557
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,32,2,128,1,float16,float16,0,0.022759999334812164
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,32,2,128,1,float16,fp8,0,0.022574399411678315
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,32,2,128,1,fp8,fp8,0,0.02253440022468567
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,32,4,128,1,float16,float16,0,0.0226623997092247
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,32,4,128,1,float16,fp8,0,0.02274399995803833
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,32,4,128,1,fp8,fp8,0,0.02274080067873001
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,32,8,128,1,float16,float16,0,0.022780799865722658
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,32,8,128,1,float16,fp8,0,0.02269120067358017
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,32,8,128,1,fp8,fp8,0,0.02271520048379898
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,32,32,128,1,float16,float16,0,0.01664000004529953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,32,32,128,1,float16,fp8,0,0.01868479996919632
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,32,32,128,1,fp8,fp8,0,0.01865759938955307
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,32,1,128,1,float16,float16,0,0.01454080045223236
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,32,1,128,1,float16,fp8,0,0.014532800018787383
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,32,1,128,1,fp8,fp8,0,0.014609600603580474
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,32,2,128,1,float16,float16,0,0.014500799775123595
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,32,2,128,1,float16,fp8,0,0.014590400457382201
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,32,4,128,1,float16,float16,0,0.014678399264812469
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,32,4,128,1,float16,fp8,0,0.014563199877738953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,32,4,128,1,fp8,fp8,0,0.014555199444293976
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,32,8,128,1,float16,float16,0,0.016633599996566772
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,32,8,128,1,float16,fp8,0,0.014542399346828461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,32,8,128,1,fp8,fp8,0,0.014596800506114959
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,32,2,128,1,fp8,fp8,0,0.060227197408676145
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,32,32,128,1,float16,fp8,0,0.01465120017528534
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,32,32,128,1,fp8,fp8,0,0.014590400457382201
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,32,1,128,1,float16,float16,0,0.0141184002161026
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,32,1,128,1,float16,fp8,0,0.014539200067520141
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,32,1,128,1,fp8,fp8,0,0.01451520025730133
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,32,2,128,1,float16,float16,0,0.014519999921321868
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,32,2,128,1,float16,fp8,0,0.01456640064716339
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,32,2,128,1,fp8,fp8,0,0.014569599926471711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,32,4,128,1,float16,float16,0,0.014564800262451171
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,32,4,128,1,float16,fp8,0,0.014553600549697876
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,32,4,128,1,fp8,fp8,0,0.012915199995040894
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,32,8,128,1,float16,float16,0,0.014542399346828461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,32,4,128,1,float16,fp8,0,0.03639360070228577
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,32,1,128,1,fp8,fp8,0,0.01842239946126938
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,32,8,128,1,fp8,fp8,0,0.01454080045223236
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,32,32,128,1,float16,float16,0,0.01459999978542328
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,32,32,128,1,float16,fp8,0,0.014480000734329224
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,32,1,128,1,float16,float16,0,0.012491200119256973
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,32,1,128,1,float16,fp8,0,0.012491200119256973
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,32,1,128,1,fp8,fp8,0,0.012534399330615998
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,32,2,128,1,float16,float16,0,0.012468799948692322
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,32,2,128,1,float16,fp8,0,0.012520000338554382
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,32,1,128,1,fp8,fp8,0,0.022699199616909027
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,32,4,128,1,float16,fp8,0,0.012494400143623352
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,32,4,128,1,fp8,fp8,0,0.012479999661445617
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,32,4,128,1,float16,float16,0,0.012756800651550293
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,32,8,128,1,float16,float16,0,0.012564800679683685
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,32,8,128,1,float16,fp8,0,0.012518399953842163
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,32,8,128,1,fp8,fp8,0,0.012535999715328216
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,32,2,128,1,fp8,fp8,0,0.01456640064716339
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,32,1,128,1,float16,float16,0,0.247108793258667
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,32,1,128,1,float16,fp8,0,0.2611840009689331
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,32,1,128,1,fp8,fp8,0,0.26118719577789307
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,32,2,128,1,float16,float16,0,0.2473423957824707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,32,32,128,1,float16,float16,0,0.014587199687957764
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,32,2,128,1,float16,fp8,0,0.2606911897659302
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,32,2,128,1,fp8,fp8,0,0.2619744062423706
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,32,4,128,1,float16,float16,0,0.2562351942062378
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,32,4,128,1,float16,fp8,0,0.2607295989990234
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,32,4,128,1,fp8,fp8,0,0.2610719919204712
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,32,8,128,1,float16,float16,0,0.27544798851013186
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,32,8,128,1,float16,fp8,0,0.26074559688568116
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,32,8,128,1,fp8,fp8,0,0.2613136053085327
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,32,32,128,1,float16,float16,0,0.20248160362243653
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,32,8,128,1,float16,fp8,0,0.01295360028743744
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,32,32,128,1,fp8,fp8,0,0.015275199711322785
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,32,1,128,1,float16,float16,0,0.13063839673995972
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,32,32,128,1,fp8,fp8,0,0.18698400259017944
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,32,1,128,1,float16,fp8,0,0.13553760051727295
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,32,2,128,1,float16,float16,0,0.13061280250549318
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,32,2,128,1,fp8,fp8,0,0.012508800625801087
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,32,2,128,1,fp8,fp8,0,0.13608959913253785
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,32,4,128,1,float16,float16,0,0.13575520515441894
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,32,4,128,1,fp8,fp8,0,0.13616960048675536
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,32,4,128,1,float16,fp8,0,0.1355520009994507
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,32,8,128,1,float16,float16,0,0.14448959827423097
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,32,8,128,1,float16,fp8,0,0.1364896059036255
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,32,32,128,1,float16,float16,0,0.10578720569610596
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,32,32,128,1,float16,fp8,0,0.09865760207176208
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,32,32,128,1,fp8,fp8,0,0.09851359724998474
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,32,1,128,1,float16,float16,0,0.07210559844970703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,32,1,128,1,float16,fp8,0,0.07200959920883179
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,32,1,128,1,fp8,fp8,0,0.07213600277900696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,32,2,128,1,float16,float16,0,0.07196320295333862
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,32,2,128,1,float16,fp8,0,0.07234879732131957
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,32,4,128,1,float16,float16,0,0.07461919784545898
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,32,4,128,1,float16,fp8,0,0.07252640128135682
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,32,4,128,1,fp8,fp8,0,0.07262560129165649
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,32,8,128,1,float16,float16,0,0.07859839797019959
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,32,8,128,1,float16,fp8,0,0.072297602891922
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,32,8,128,1,fp8,fp8,0,0.07356640100479125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,32,32,128,1,float16,float16,0,0.05735679864883423
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,32,32,128,1,fp8,fp8,0,0.05341119766235351
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,32,1,128,1,float16,float16,0,0.039105600118637084
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,32,1,128,1,float16,fp8,0,0.0390720009803772
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,32,32,128,1,float16,fp8,0,0.18691840171813964
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,32,1,128,1,fp8,fp8,0,0.039103999733924866
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,32,2,128,1,float16,float16,0,0.0391728013753891
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,32,2,128,1,float16,fp8,0,0.039175999164581296
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,32,2,128,1,fp8,fp8,0,0.03911199867725372
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,32,4,128,1,float16,float16,0,0.04071359932422638
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,32,4,128,1,float16,fp8,0,0.03914560079574585
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,32,4,128,1,fp8,fp8,0,0.03914240002632141
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,32,1,128,1,fp8,fp8,0,0.13565119504928588
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,32,8,128,1,float16,float16,0,0.04339039921760559
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,32,8,128,1,float16,fp8,0,0.03917759954929352
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,32,8,128,1,fp8,fp8,0,0.03914400041103363
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,32,32,128,1,float16,float16,0,0.033062401413917544
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,32,32,128,1,fp8,fp8,0,0.030934399366378783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,32,32,128,1,float16,fp8,0,0.030899199843406677
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,32,1,128,1,float16,float16,0,0.02480800002813339
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,32,1,128,1,float16,fp8,0,0.02479359954595566
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,32,2,128,1,float16,float16,0,0.024825599789619446
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,32,2,128,1,float16,fp8,0,0.024803200364112855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,32,2,128,1,fp8,fp8,0,0.024718399345874786
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,32,4,128,1,float16,float16,0,0.024740800261497498
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,32,1,128,1,fp8,fp8,0,0.11260639429092408
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,32,4,128,1,float16,fp8,0,0.024751999974250795
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,32,4,128,1,fp8,fp8,0,0.02476000040769577
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,32,8,128,1,float16,float16,0,0.026855999231338502
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,32,8,128,1,float16,fp8,0,0.02473440021276474
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,32,8,128,1,fp8,fp8,0,0.024799999594688416
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,32,32,128,1,float16,float16,0,0.018590399622917177
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,32,32,128,1,float16,fp8,0,0.018665599822998046
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,32,32,128,1,fp8,fp8,0,0.018639999628067016
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,32,1,128,1,float16,float16,0,0.01570879966020584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,32,1,128,1,float16,fp8,0,0.016510400176048278
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,32,1,128,1,fp8,fp8,0,0.016603200137615202
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,32,2,128,1,float16,float16,0,0.01564960032701492
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,32,2,128,1,float16,fp8,0,0.016603200137615202
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,32,2,128,1,fp8,fp8,0,0.016607999801635742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,32,4,128,1,float16,float16,0,0.016612799465656282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,32,4,128,1,float16,fp8,0,0.016596800088882445
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,32,4,128,1,fp8,fp8,0,0.016596800088882445
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,32,8,128,1,float16,float16,0,0.0166143998503685
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,32,8,128,1,float16,fp8,0,0.016603200137615202
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,32,8,128,1,fp8,fp8,0,0.016601599752902985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,32,32,128,1,float16,float16,0,0.012503999471664428
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,32,32,128,1,float16,fp8,0,0.012825599312782288
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,32,32,128,1,fp8,fp8,0,0.012540799379348756
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,32,1,128,1,float16,float16,0,0.01048320010304451
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,32,1,128,1,float16,fp8,0,0.012390399724245072
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,32,1,128,1,fp8,fp8,0,0.012503999471664428
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,32,2,128,1,float16,float16,0,0.010577599704265594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,32,2,128,1,float16,fp8,0,0.012427199631929398
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,32,2,128,1,fp8,fp8,0,0.012511999905109405
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,32,4,128,1,float16,float16,0,0.012451200187206269
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,32,4,128,1,float16,fp8,0,0.012513600289821625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,32,4,128,1,fp8,fp8,0,0.01250240057706833
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,32,8,128,1,float16,float16,0,0.012516799569129943
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,32,8,128,1,float16,fp8,0,0.012518399953842163
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,32,8,128,1,fp8,fp8,0,0.01247519999742508
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,32,32,128,1,float16,float16,0,0.01252640038728714
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,32,32,128,1,float16,fp8,0,0.01244639977812767
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,32,32,128,1,fp8,fp8,0,0.012532800436019897
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,32,1,128,1,float16,float16,0,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,32,1,128,1,float16,fp8,0,0.010451199859380722
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,32,1,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,32,2,128,1,float16,float16,0,0.010471999645233154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,32,2,128,1,float16,fp8,0,0.0108255997300148
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,32,2,128,1,fp8,fp8,0,0.010526400059461594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,32,4,128,1,float16,float16,0,0.010440000146627427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,32,4,128,1,float16,fp8,0,0.010491199791431427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,32,4,128,1,fp8,fp8,0,0.010452800244092942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,32,8,128,1,float16,float16,0,0.010494399815797806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,32,8,128,1,float16,fp8,0,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,32,8,128,1,fp8,fp8,0,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,32,32,128,1,float16,float16,0,0.010446400195360184
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,32,32,128,1,float16,fp8,0,0.010489600151777268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,32,32,128,1,fp8,fp8,0,0.010487999767065048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,32,1,128,1,float16,float16,0,0.010465600341558457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,32,1,128,1,float16,fp8,0,0.01043360009789467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,32,1,128,1,fp8,fp8,0,0.010385599732398988
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,32,2,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,32,2,128,1,float16,fp8,0,0.010443200170993806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,32,2,128,1,fp8,fp8,0,0.01045600026845932
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,32,4,128,1,float16,float16,0,0.010470400005578995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,32,4,128,1,float16,fp8,0,0.010443200170993806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,32,4,128,1,fp8,fp8,0,0.010454399883747101
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,32,8,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,32,8,128,1,float16,fp8,0,0.010507199913263321
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,32,8,128,1,fp8,fp8,0,0.010440000146627427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,32,2,128,1,float16,fp8,0,0.13591040372848512
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,32,1,128,1,float16,float16,0,0.2078671932220459
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,32,1,128,1,float16,fp8,0,0.21123840808868408
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,32,1,128,1,fp8,fp8,0,0.21046879291534423
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,32,1,128,1,fp8,fp8,0,0.024750399589538574
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,32,2,128,1,float16,float16,0,0.20926880836486816
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,32,2,128,1,fp8,fp8,0,0.2100208044052124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,32,4,128,1,float16,float16,0,0.21234400272369386
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,32,4,128,1,float16,fp8,0,0.21088480949401855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,32,4,128,1,fp8,fp8,0,0.21077919006347656
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,32,8,128,1,float16,float16,0,0.22238080501556395
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,32,8,128,1,float16,fp8,0,0.20990400314331054
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,32,2,128,1,fp8,fp8,0,0.07189760208129883
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,32,32,128,1,float16,float16,0,0.14455360174179077
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,32,32,128,1,float16,fp8,0,0.1358224034309387
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,32,32,128,1,fp8,fp8,0,0.13601280450820924
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,32,1,128,1,float16,float16,0,0.11194080114364624
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,32,1,128,1,float16,fp8,0,0.10999360084533691
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,32,1,128,1,fp8,fp8,0,0.11002240180969239
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,32,2,128,1,float16,float16,0,0.11179360151290893
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,32,2,128,1,float16,fp8,0,0.11081600189208984
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,32,2,128,1,fp8,fp8,0,0.11034239530563354
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,32,4,128,1,float16,float16,0,0.11445920467376709
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,32,4,128,1,float16,fp8,0,0.11087360382080078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,32,4,128,1,fp8,fp8,0,0.1099552035331726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,32,8,128,1,float16,float16,0,0.11845120191574096
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,32,32,128,1,float16,fp8,0,0.05337920188903809
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,32,32,128,1,float16,float16,0,0.07750399708747864
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,32,32,128,1,float16,fp8,0,0.072326397895813
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,32,32,128,1,fp8,fp8,0,0.07312480211257935
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,32,1,128,1,float16,float16,0,0.05985760092735291
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,32,1,128,1,float16,fp8,0,0.05896959900856018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,32,1,128,1,fp8,fp8,0,0.05912799835205078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,32,2,128,1,float16,float16,0,0.05983999967575073
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,32,2,128,1,float16,fp8,0,0.058748799562454226
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,32,2,128,1,fp8,fp8,0,0.058569598197937014
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,32,4,128,1,float16,float16,0,0.06198080182075501
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,32,4,128,1,float16,fp8,0,0.05861759781837463
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,32,8,128,1,float16,float16,0,0.06464959979057312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,32,8,128,1,float16,fp8,0,0.059252798557281494
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,32,8,128,1,fp8,fp8,0,0.05864959955215454
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,32,32,128,1,float16,float16,0,0.043038401007652285
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,32,8,128,1,fp8,fp8,0,0.1356112003326416
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,32,32,128,1,float16,fp8,0,0.03912799954414368
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,32,2,128,1,float16,fp8,0,0.21155359745025634
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,32,32,128,1,fp8,fp8,0,0.039150398969650266
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,32,1,128,1,float16,float16,0,0.03477759957313538
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,32,1,128,1,float16,fp8,0,0.03306879997253418
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,32,1,128,1,fp8,fp8,0,0.032969599962234496
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,32,2,128,1,float16,float16,0,0.03467519879341126
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,32,2,128,1,float16,fp8,0,0.03307200074195862
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,32,4,128,1,float16,float16,0,0.035099199414253233
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,32,4,128,1,float16,fp8,0,0.03299039900302887
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,32,4,128,1,fp8,fp8,0,0.03299840092658997
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,32,8,128,1,float16,float16,0,0.03612799942493439
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,32,8,128,1,float16,fp8,0,0.03295519948005676
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,32,32,128,1,float16,float16,0,0.024771200120449068
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,32,8,128,1,fp8,fp8,0,0.21090080738067626
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,32,32,128,1,float16,fp8,0,0.024753600358963013
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,32,1,128,1,float16,float16,0,0.022599999606609345
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,32,32,128,1,fp8,fp8,0,0.024806399643421174
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,32,1,128,1,fp8,fp8,0,0.021379199624061585
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,32,2,128,1,float16,float16,0,0.022519999742507936
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,32,2,128,1,float16,fp8,0,0.02146400064229965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,32,2,128,1,fp8,fp8,0,0.021537600457668303
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,32,4,128,1,float16,float16,0,0.022697600722312927
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,32,4,128,1,float16,fp8,0,0.021270400285720824
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,32,4,128,1,fp8,fp8,0,0.02112320065498352
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,32,8,128,1,float16,fp8,0,0.11108640432357789
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,32,8,128,1,float16,float16,0,0.02268799990415573
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,32,8,128,1,float16,fp8,0,0.02141599953174591
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,32,8,128,1,fp8,fp8,0,0.020710399746894835
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,32,32,128,1,float16,float16,0,0.016625599563121797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,32,32,128,1,float16,fp8,0,0.016518400609493257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,32,32,128,1,fp8,fp8,0,0.016625599563121797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,32,1,128,1,float16,float16,0,0.0144896000623703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,32,1,128,1,float16,fp8,0,0.014550399780273438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,32,1,128,1,fp8,fp8,0,0.014446400105953217
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,32,2,128,1,float16,float16,0,0.014575999975204468
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,32,2,128,1,float16,fp8,0,0.014502400159835815
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,32,2,128,1,fp8,fp8,0,0.014574399590492249
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,32,4,128,1,float16,float16,0,0.014502400159835815
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,32,4,128,1,float16,fp8,0,0.014603200554847717
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,32,4,128,1,fp8,fp8,0,0.014478400349617004
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,32,8,128,1,float16,float16,0,0.014564800262451171
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,32,8,128,1,float16,fp8,0,0.014593599736690522
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,32,8,128,1,fp8,fp8,0,0.014590400457382201
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,32,32,128,1,float16,fp8,0,0.010871999710798264
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,32,32,128,1,fp8,fp8,0,0.011383999884128571
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,32,4,128,1,fp8,fp8,0,0.0592415988445282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,32,1,128,1,float16,float16,0,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,32,1,128,1,float16,fp8,0,0.01045600026845932
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,32,1,128,1,fp8,fp8,0,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,32,2,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,32,2,128,1,float16,fp8,0,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,32,2,128,1,fp8,fp8,0,0.01051200032234192
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,32,4,128,1,float16,float16,0,0.010462400317192078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,32,4,128,1,float16,fp8,0,0.01048320010304451
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,32,8,128,1,float16,float16,0,0.011023999750614166
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,32,8,128,1,float16,fp8,0,0.010480000078678131
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,32,8,128,1,fp8,fp8,0,0.010487999767065048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,32,2,128,1,fp8,fp8,0,0.03300159871578216
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,32,32,128,1,float16,float16,0,0.010454399883747101
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,32,32,128,1,float16,fp8,0,0.010427200049161912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,32,32,128,1,fp8,fp8,0,0.010515200346708298
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,32,8,128,1,fp8,fp8,0,0.03299199938774109
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,32,1,128,1,float16,float16,0,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,32,1,128,1,float16,fp8,0,0.010396800190210342
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,32,1,128,1,fp8,fp8,0,0.01037919968366623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,32,2,128,1,float16,float16,0,0.010487999767065048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,32,2,128,1,float16,fp8,0,0.01045600026845932
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,32,1,128,1,float16,fp8,0,0.020667199790477753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,32,4,128,1,float16,float16,0,0.010398399829864503
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,32,4,128,1,float16,fp8,0,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,32,8,128,1,float16,float16,0,0.010468800365924836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,32,8,128,1,float16,fp8,0,0.0104032002389431
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,32,8,128,1,fp8,fp8,0,0.010428799688816071
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,32,32,128,1,float16,float16,0,0.010492800176143647
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,32,32,128,1,float16,fp8,0,0.01043199971318245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,32,32,128,1,fp8,fp8,0,0.010360000282526016
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,32,1,128,1,float16,float16,0,0.01040000021457672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,32,1,128,1,float16,fp8,0,0.01043360009789467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,32,1,128,1,fp8,fp8,0,0.010470400005578995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,32,2,128,1,float16,float16,0,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,32,2,128,1,float16,fp8,0,0.010473600029945374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,32,2,128,1,fp8,fp8,0,0.010440000146627427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,32,8,128,1,fp8,fp8,0,0.1101904034614563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,32,4,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,32,4,128,1,float16,float16,0,0.010465600341558457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,32,4,128,1,fp8,fp8,0,0.010494399815797806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,32,8,128,1,float16,fp8,0,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,32,8,128,1,fp8,fp8,0,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,32,8,128,1,float16,float16,0,0.01040479987859726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,32,1,128,1,float16,fp8,0,0.18428319692611694
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,32,1,128,1,fp8,fp8,0,0.18380000591278076
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,32,2,128,1,float16,float16,0,0.1897663950920105
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,32,2,128,1,float16,fp8,0,0.18469280004501343
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,32,2,128,1,fp8,fp8,0,0.18485759496688842
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,32,4,128,1,float16,fp8,0,0.18438080549240113
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,32,4,128,1,fp8,fp8,0,0.18538719415664673
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,32,8,128,1,float16,float16,0,0.19299999475479127
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,32,8,128,1,float16,fp8,0,0.18512799739837646
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,32,8,128,1,fp8,fp8,0,0.18468639850616456
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,32,4,128,1,fp8,fp8,0,0.010487999767065048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,32,32,128,1,float16,float16,0,0.11600320339202881
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,32,32,128,1,float16,fp8,0,0.11109919548034668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,32,32,128,1,fp8,fp8,0,0.11064319610595703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,32,1,128,1,float16,float16,0,0.09967359900474548
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,32,1,128,1,float16,fp8,0,0.09637920260429382
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,32,1,128,1,fp8,fp8,0,0.09671040177345276
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,32,2,128,1,float16,float16,0,0.09939680099487305
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,32,2,128,1,float16,fp8,0,0.09651839733123779
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,32,2,128,1,fp8,fp8,0,0.09703199863433838
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,32,4,128,1,float16,float16,0,0.10172480344772339
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,32,4,128,1,float16,fp8,0,0.09667040109634399
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,32,4,128,1,fp8,fp8,0,0.09692320227622986
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,32,8,128,1,float16,float16,0,0.1045680046081543
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,32,2,128,1,fp8,fp8,0,0.01045600026845932
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,32,8,128,1,float16,fp8,0,0.09648479819297791
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,32,8,128,1,fp8,fp8,0,0.09689279794692993
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,32,32,128,1,float16,float16,0,0.06425759792327881
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,32,32,128,1,float16,fp8,0,0.05881440043449402
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,32,4,128,1,fp8,fp8,0,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,32,32,128,1,fp8,fp8,0,0.059012800455093384
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,32,1,128,1,float16,float16,0,0.05481759905815124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,32,1,128,1,float16,fp8,0,0.05320960283279419
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,32,1,128,1,fp8,fp8,0,0.05246400237083435
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,32,2,128,1,float16,float16,0,0.05544480085372925
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,32,2,128,1,float16,fp8,0,0.05269439816474915
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,32,2,128,1,fp8,fp8,0,0.05317280292510986
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,32,4,128,1,float16,float16,0,0.05569120049476624
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,32,4,128,1,float16,fp8,0,0.05249599814414978
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,32,4,128,1,fp8,fp8,0,0.05236480236053467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,32,8,128,1,float16,float16,0,0.056974399089813235
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,32,8,128,1,float16,fp8,0,0.05310879945755005
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,32,8,128,1,fp8,fp8,0,0.05288800001144409
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,32,32,128,1,float16,float16,0,0.03443840146064758
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,32,32,128,1,float16,fp8,0,0.033055999875068666
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,32,1,128,1,float16,float16,0,0.03229919970035553
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,32,32,128,1,fp8,fp8,0,0.032971200346946714
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,32,1,128,1,float16,fp8,0,0.030660799145698546
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,32,1,128,1,fp8,fp8,0,0.030675199627876282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,32,2,128,1,float16,fp8,0,0.03062080144882202
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,32,4,128,1,float16,fp8,0,0.030582401156425475
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,32,4,128,1,float16,float16,0,0.03252480030059814
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,32,4,128,1,fp8,fp8,0,0.030640000104904176
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,32,8,128,1,float16,float16,0,0.03306719958782196
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,32,8,128,1,float16,fp8,0,0.030649599432945252
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,32,1,128,1,float16,float16,0,0.18844319581985475
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,32,32,128,1,float16,float16,0,0.021398399770259858
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,32,32,128,1,float16,fp8,0,0.020720000565052032
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,32,32,128,1,fp8,fp8,0,0.020739200711250304
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,32,1,128,1,float16,float16,0,0.02062560021877289
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,32,1,128,1,float16,fp8,0,0.020068800449371337
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,32,1,128,1,fp8,fp8,0,0.019449600577354433
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,32,32,128,1,float16,float16,0,0.012441600114107132
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,32,2,128,1,float16,fp8,0,0.019676800072193145
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,32,2,128,1,fp8,fp8,0,0.020633600652217865
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,32,4,128,1,float16,float16,0,0.19019999504089355
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,32,4,128,1,float16,float16,0,0.020715199410915375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,32,4,128,1,float16,fp8,0,0.020505599677562714
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,32,4,128,1,fp8,fp8,0,0.019465599954128266
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,32,8,128,1,float16,float16,0,0.02072480022907257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,32,8,128,1,float16,fp8,0,0.020524799823760986
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,32,8,128,1,fp8,fp8,0,0.019735999405384064
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,32,32,128,1,fp8,fp8,0,0.014571200311183929
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,32,1,128,1,float16,float16,0,0.014424000680446625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,32,1,128,1,float16,fp8,0,0.013363200426101684
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,32,1,128,1,fp8,fp8,0,0.013348799943923951
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,32,2,128,1,float16,float16,0,0.014550399780273438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,32,2,128,1,float16,fp8,0,0.01451520025730133
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,32,2,128,1,fp8,fp8,0,0.014507199823856353
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,32,4,128,1,float16,float16,0,0.014548799395561219
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,32,4,128,1,float16,fp8,0,0.014422400295734406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,32,4,128,1,fp8,fp8,0,0.012723200023174286
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,32,8,128,1,float16,float16,0,0.014542399346828461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,32,8,128,1,float16,fp8,0,0.01311040073633194
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,32,8,128,1,fp8,fp8,0,0.013128000497817992
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,32,32,128,1,float16,float16,0,0.011664000153541566
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,32,32,128,1,float16,fp8,0,0.010441599786281586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,32,32,128,1,fp8,fp8,0,0.010449600219726563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,32,1,128,1,float16,float16,0,0.010473600029945374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,32,2,128,1,float16,float16,0,0.0323199987411499
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,32,2,128,1,fp8,fp8,0,0.030958399176597595
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,32,1,128,1,fp8,fp8,0,0.010467199981212616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,32,2,128,1,float16,float16,0,0.010444799810647965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,32,2,128,1,float16,fp8,0,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,32,2,128,1,fp8,fp8,0,0.010465600341558457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,32,4,128,1,float16,float16,0,0.010492800176143647
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,32,4,128,1,float16,fp8,0,0.010497599840164185
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,32,8,128,1,fp8,fp8,0,0.030929601192474364
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,32,8,128,1,float16,float16,0,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,32,8,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,32,32,128,1,float16,float16,0,0.010465600341558457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,32,32,128,1,float16,fp8,0,0.010428799688816071
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,32,32,128,1,fp8,fp8,0,0.010462400317192078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,32,1,128,1,float16,float16,0,0.010428799688816071
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,32,1,128,1,float16,fp8,0,0.010446400195360184
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,32,2,128,1,float16,float16,0,0.020751999318599702
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,32,1,128,1,fp8,fp8,0,0.010409600287675857
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,32,2,128,1,float16,float16,0,0.010496000200510025
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,32,2,128,1,float16,fp8,0,0.010465600341558457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,32,2,128,1,fp8,fp8,0,0.010419200360774993
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,32,4,128,1,float16,fp8,0,0.010452800244092942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,32,4,128,1,fp8,fp8,0,0.010387200117111205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,32,8,128,1,float16,float16,0,0.010411199927330018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,32,8,128,1,float16,fp8,0,0.010419200360774993
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,32,8,128,1,fp8,fp8,0,0.010471999645233154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,32,32,128,1,float16,float16,0,0.010375999659299851
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,32,32,128,1,float16,fp8,0,0.01446239948272705
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,32,32,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,32,32,128,1,fp8,fp8,0,0.009891200065612792
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,32,1,128,1,float16,float16,0,0.010440000146627427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,32,1,128,1,float16,fp8,0,0.01040479987859726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,32,1,128,1,fp8,fp8,0,0.008961600065231324
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,32,2,128,1,float16,float16,0,0.010380800068378448
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,32,2,128,1,float16,fp8,0,0.010409600287675857
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,32,2,128,1,fp8,fp8,0,0.008974400162696839
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,32,4,128,1,float16,float16,0,0.010355199873447418
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,32,4,128,1,fp8,fp8,0,0.010435199737548828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,32,8,128,1,float16,float16,0,0.010372799634933472
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,32,8,128,1,float16,fp8,0,0.010398399829864503
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,32,8,128,1,fp8,fp8,0,0.010320000350475311
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,32,1,128,1,float16,float16,0,0.18345119953155517
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,32,1,128,1,float16,fp8,0,0.1747663974761963
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,32,1,128,1,fp8,fp8,0,0.17395999431610107
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,32,2,128,1,float16,float16,0,0.18249919414520263
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,32,2,128,1,float16,fp8,0,0.1742959976196289
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,32,2,128,1,fp8,fp8,0,0.17388479709625243
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,32,4,128,1,fp8,fp8,0,0.010407999902963639
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,32,8,128,1,float16,fp8,0,0.010507199913263321
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,32,4,128,1,float16,fp8,0,0.17279520034790039
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,32,4,128,1,fp8,fp8,0,0.17274399995803832
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,32,4,128,1,float16,float16,0,0.010532800108194351
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,32,8,128,1,float16,fp8,0,0.17358880043029784
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,32,8,128,1,float16,float16,0,0.18510559797286988
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,32,32,128,1,float16,float16,0,0.10737440586090088
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,32,32,128,1,float16,fp8,0,0.09803360104560851
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,32,8,128,1,fp8,fp8,0,0.17276320457458497
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,32,32,128,1,float16,float16,0,0.014579200744628906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,32,32,128,1,fp8,fp8,0,0.09887199997901916
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,32,1,128,1,float16,float16,0,0.09667360186576843
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,32,1,128,1,float16,fp8,0,0.09232479929924012
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,32,1,128,1,fp8,fp8,0,0.09218559861183166
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,32,2,128,1,float16,float16,0,0.09789760112762451
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,32,2,128,1,float16,fp8,0,0.09209280014038086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,32,2,128,1,fp8,fp8,0,0.09188479781150818
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,32,4,128,1,float16,fp8,0,0.00941760018467903
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,32,4,128,1,float16,float16,0,0.09814239740371704
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,32,4,128,1,float16,fp8,0,0.09166240096092224
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,32,4,128,1,fp8,fp8,0,0.09200959801673889
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,32,8,128,1,float16,float16,0,0.09844639897346497
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,32,32,128,1,float16,float16,0,0.05727519989013672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,32,8,128,1,fp8,fp8,0,0.09206240177154541
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,32,32,128,1,fp8,fp8,0,0.05421919822692871
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,32,1,128,1,float16,float16,0,0.054996800422668454
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,32,1,128,1,float16,fp8,0,0.051262402534484865
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,32,1,128,1,fp8,fp8,0,0.0511135995388031
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,32,2,128,1,float16,float16,0,0.05518239736557007
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,32,2,128,1,fp8,fp8,0,0.05137280225753784
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,32,2,128,1,float16,fp8,0,0.051102399826049805
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,32,4,128,1,float16,float16,0,0.055244797468185426
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,32,4,128,1,float16,fp8,0,0.05123360157012939
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,32,8,128,1,float16,float16,0,0.055323201417922976
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,32,4,128,1,fp8,fp8,0,0.05144000053405762
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,32,8,128,1,fp8,fp8,0,0.05097119808197022
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,32,32,128,1,float16,float16,0,0.032839998602867126
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,32,32,128,1,float16,fp8,0,0.03091840147972107
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,32,32,128,1,fp8,fp8,0,0.03094559907913208
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,32,1,128,1,float16,float16,0,0.030953601002693176
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,32,1,128,1,float16,fp8,0,0.028870400786399842
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,32,1,128,1,fp8,fp8,0,0.028923198580741882
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,32,2,128,1,float16,float16,0,0.031062400341033934
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,32,2,128,1,float16,fp8,0,0.02929919958114624
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,32,4,128,1,float16,float16,0,0.03195039927959442
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,32,1,128,1,float16,fp8,0,0.010451199859380722
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,32,4,128,1,float16,fp8,0,0.02922079861164093
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,32,4,128,1,fp8,fp8,0,0.029104000329971312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,32,8,128,1,float16,float16,0,0.03249439895153046
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,32,8,128,1,float16,fp8,0,0.02909280061721802
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,32,8,128,1,fp8,fp8,0,0.029304000735282897
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,32,32,128,1,float16,float16,0,0.02069759964942932
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,32,32,128,1,float16,fp8,0,0.020718400180339814
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,32,32,128,1,fp8,fp8,0,0.020695999264717102
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,32,4,128,1,float16,float16,0,0.18402719497680664
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,32,1,128,1,float16,float16,0,0.020614400506019592
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,32,1,128,1,float16,fp8,0,0.01865600049495697
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,32,1,128,1,fp8,fp8,0,0.01870719939470291
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,32,2,128,1,fp8,fp8,0,0.018603199720382692
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,32,4,128,1,float16,float16,0,0.020313599705696107
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,32,2,128,1,float16,fp8,0,0.01863519996404648
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,32,4,128,1,fp8,fp8,0,0.01903360038995743
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,32,8,128,1,float16,fp8,0,0.018980799615383147
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,32,8,128,1,fp8,fp8,0,0.01919520050287247
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,32,32,128,1,float16,float16,0,0.014507199823856353
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,32,32,128,1,float16,fp8,0,0.013910399377346038
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,32,32,128,1,fp8,fp8,0,0.013972799479961395
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,32,1,128,1,float16,float16,0,0.013166399300098419
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,32,1,128,1,float16,fp8,0,0.012513600289821625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,32,8,128,1,float16,fp8,0,0.09122880101203919
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,32,1,128,1,fp8,fp8,0,0.01252480000257492
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,32,32,128,1,float16,fp8,0,0.053985601663589476
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,32,2,128,1,float16,float16,0,0.012915199995040894
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,32,2,128,1,float16,fp8,0,0.01255359947681427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,32,2,128,1,fp8,fp8,0,0.01356160044670105
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,32,4,128,1,float16,float16,0,0.01300159990787506
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,32,4,128,1,fp8,fp8,0,0.012535999715328216
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,32,4,128,1,float16,fp8,0,0.012521600723266602
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,32,8,128,1,float16,float16,0,0.014315199851989747
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,32,8,128,1,float16,fp8,0,0.012495999783277511
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,32,8,128,1,fp8,fp8,0,0.01255359947681427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,32,32,128,1,float16,float16,0,0.010486400127410889
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,32,1,128,1,float16,float16,0,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,32,8,128,1,float16,fp8,0,0.05124800205230713
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,32,1,128,1,float16,fp8,0,0.010831999778747558
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,32,1,128,1,fp8,fp8,0,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,32,2,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,32,2,128,1,float16,fp8,0,0.010390400141477584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,32,2,128,1,fp8,fp8,0,0.01040479987859726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,32,4,128,1,float16,float16,0,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,32,4,128,1,float16,fp8,0,0.0104032002389431
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,32,4,128,1,fp8,fp8,0,0.010497599840164185
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,32,8,128,1,float16,float16,0,0.010416000336408614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,32,8,128,1,float16,fp8,0,0.010503999888896942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,32,2,128,1,fp8,fp8,0,0.028915199637413024
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,32,8,128,1,fp8,fp8,0,0.010492800176143647
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,32,32,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,32,32,128,1,fp8,fp8,0,0.010473600029945374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,32,1,128,1,float16,float16,0,0.010449600219726563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,32,1,128,1,float16,fp8,0,0.010465600341558457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,32,1,128,1,fp8,fp8,0,0.01048159971833229
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,32,2,128,1,float16,float16,0,0.010411199927330018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,32,2,128,1,float16,fp8,0,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,32,2,128,1,float16,float16,0,0.020465600490570068
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,32,2,128,1,fp8,fp8,0,0.010068800300359726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,32,4,128,1,float16,float16,0,0.010351999849081039
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,32,4,128,1,float16,fp8,0,0.010127999633550645
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,32,4,128,1,fp8,fp8,0,0.010345599800348281
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,32,8,128,1,float16,float16,0,0.010382399708032609
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,32,4,128,1,float16,fp8,0,0.01860959976911545
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,32,8,128,1,float16,fp8,0,0.00987199991941452
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,32,32,128,1,float16,fp8,0,0.009012799710035324
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,32,32,128,1,float16,float16,0,0.010387200117111205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,32,8,128,1,fp8,fp8,0,0.010382399708032609
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,32,32,128,1,fp8,fp8,0,0.009372799843549728
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,32,1,128,1,float16,float16,0,0.009352000057697296
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,32,1,128,1,float16,fp8,0,0.010358399897813796
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,32,1,128,1,fp8,fp8,0,0.008462399989366532
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,32,2,128,1,float16,float16,0,0.009030400216579438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,32,2,128,1,float16,fp8,0,0.009647999703884125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,32,2,128,1,fp8,fp8,0,0.008542399853467941
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,32,4,128,1,float16,float16,0,0.008849599957466125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,32,4,128,1,float16,fp8,0,0.010047999769449234
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,32,4,128,1,fp8,fp8,0,0.010304000228643417
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,32,8,128,1,float16,float16,0,0.010377600044012069
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,32,8,128,1,float16,fp8,0,0.010463999956846238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,32,8,128,1,fp8,fp8,0,0.010361599922180175
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,32,1,128,1,float16,fp8,0,0.1681488037109375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,32,1,128,1,float16,float16,0,0.18013759851455688
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,32,1,128,1,fp8,fp8,0,0.16744159460067748
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,32,2,128,1,fp8,fp8,0,0.16768640279769897
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,32,4,128,1,float16,float16,0,0.1794927954673767
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,32,32,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,32,4,128,1,float16,fp8,0,0.1673375964164734
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,32,4,128,1,fp8,fp8,0,0.16775519847869874
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,32,8,128,1,float16,float16,0,0.1796015977859497
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,32,8,128,1,float16,fp8,0,0.1674239993095398
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,32,8,128,1,fp8,fp8,0,0.16761280298233033
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,32,32,128,1,float16,float16,0,0.09648320078849792
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,32,32,128,1,float16,fp8,0,0.010441599786281586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,32,32,128,1,fp8,fp8,0,0.08947359919548034
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,32,1,128,1,float16,float16,0,0.09531840085983276
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,32,1,128,1,float16,fp8,0,0.08904479742050171
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,32,1,128,1,fp8,fp8,0,0.089547199010849
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,32,2,128,1,float16,float16,0,0.0959984004497528
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,32,2,128,1,fp8,fp8,0,0.08914880156517029
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,32,4,128,1,float16,float16,0,0.09527999758720399
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,32,4,128,1,float16,fp8,0,0.08913919925689698
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,32,4,128,1,fp8,fp8,0,0.08894559741020203
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,32,8,128,1,float16,float16,0,0.09525279998779297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,32,8,128,1,float16,fp8,0,0.0891759991645813
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,32,8,128,1,fp8,fp8,0,0.08879520297050476
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,32,32,128,1,float16,float16,0,0.05378239750862122
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,32,32,128,1,float16,fp8,0,0.04949280023574829
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,32,32,128,1,fp8,fp8,0,0.04939680099487305
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,32,1,128,1,float16,float16,0,0.0536191999912262
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,32,2,128,1,float16,float16,0,0.17977759838104249
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,32,1,128,1,float16,fp8,0,0.049327999353408813
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,32,1,128,1,fp8,fp8,0,0.04929920136928558
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,32,2,128,1,float16,float16,0,0.053947198390960696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,32,2,128,1,float16,fp8,0,0.04952960014343262
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,32,2,128,1,fp8,fp8,0,0.04945439994335175
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,32,4,128,1,float16,float16,0,0.05401759743690491
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,32,4,128,1,fp8,fp8,0,0.049563199281692505
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,32,32,128,1,float16,fp8,0,0.010416000336408614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,32,8,128,1,float16,float16,0,0.05379679799079895
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,32,8,128,1,float16,fp8,0,0.049456000328063965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,32,32,128,1,float16,float16,0,0.031060799956321716
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,32,8,128,1,fp8,fp8,0,0.049188798666000365
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,32,32,128,1,float16,fp8,0,0.028884801268577575
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,32,32,128,1,fp8,fp8,0,0.028886398673057555
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,32,1,128,1,float16,float16,0,0.030876800417900085
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,32,1,128,1,float16,fp8,0,0.02887200117111206
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,32,1,128,1,fp8,fp8,0,0.02892799973487854
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,32,2,128,1,float16,fp8,0,0.02887679934501648
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,32,2,128,1,fp8,fp8,0,0.028883200883865357
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,32,4,128,1,float16,float16,0,0.030939200520515443
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,32,4,128,1,float16,fp8,0,0.028918400406837463
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,32,32,128,1,float16,fp8,0,0.08974400162696838
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,32,8,128,1,float16,float16,0,0.030963200330734252
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,32,4,128,1,fp8,fp8,0,0.028918400406837463
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,32,8,128,1,float16,fp8,0,0.028934401273727418
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,32,32,128,1,float16,float16,0,0.02064319998025894
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,32,8,128,1,fp8,fp8,0,0.028908801078796387
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,32,32,128,1,fp8,fp8,0,0.0186271995306015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,32,1,128,1,float16,float16,0,0.01956319957971573
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,32,1,128,1,float16,fp8,0,0.01867839992046356
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,32,1,128,1,fp8,fp8,0,0.01865759938955307
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,32,2,128,1,float16,fp8,0,0.08943359851837158
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,32,2,128,1,float16,float16,0,0.01868959963321686
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,32,2,128,1,float16,fp8,0,0.018662400543689728
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,32,2,128,1,fp8,fp8,0,0.018772800266742707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,32,4,128,1,float16,fp8,0,0.01870879977941513
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,32,4,128,1,fp8,fp8,0,0.01868959963321686
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,32,8,128,1,float16,float16,0,0.018662400543689728
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,32,8,128,1,float16,fp8,0,0.018649600446224213
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,32,8,128,1,fp8,fp8,0,0.01865919977426529
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,32,8,128,1,float16,float16,0,0.020659199357032774
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,32,32,128,1,float16,float16,0,0.014579200744628906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,32,32,128,1,float16,fp8,0,0.01252799928188324
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,32,32,128,1,fp8,fp8,0,0.013523200154304504
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,32,1,128,1,float16,float16,0,0.014478400349617004
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,32,1,128,1,float16,fp8,0,0.01263200044631958
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,32,1,128,1,fp8,fp8,0,0.01266240030527115
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,32,2,128,1,float16,float16,0,0.014590400457382201
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,32,2,128,1,float16,fp8,0,0.012559999525547028
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,32,2,128,1,fp8,fp8,0,0.01372160017490387
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,32,4,128,1,float16,float16,0,0.014377599954605103
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,32,4,128,1,float16,fp8,0,0.013361600041389466
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,32,4,128,1,fp8,fp8,0,0.012772800028324127
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,32,8,128,1,float16,float16,0,0.014571200311183929
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,32,8,128,1,float16,fp8,0,0.01292479932308197
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,32,8,128,1,fp8,fp8,0,0.013033600151538849
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,32,32,128,1,float16,float16,0,0.011910399794578553
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,32,32,128,1,float16,fp8,0,0.010440000146627427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,32,32,128,1,fp8,fp8,0,0.010521599650382995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,32,1,128,1,float16,float16,0,0.010468800365924836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,32,1,128,1,float16,fp8,0,0.010406400263309478
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,32,1,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,32,2,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,32,2,128,1,float16,fp8,0,0.010463999956846238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,32,2,128,1,fp8,fp8,0,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,32,4,128,1,float16,float16,0,0.01048320010304451
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,32,4,128,1,float16,fp8,0,0.04949440062046051
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,32,4,128,1,float16,fp8,0,0.010494399815797806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,32,8,128,1,float16,float16,0,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,32,8,128,1,fp8,fp8,0,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,32,32,128,1,float16,float16,0,0.010489600151777268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,32,32,128,1,float16,fp8,0,0.010360000282526016
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,32,2,128,1,float16,float16,0,0.030907198786735535
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,32,32,128,1,fp8,fp8,0,0.010382399708032609
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,32,1,128,1,float16,float16,0,0.010364799946546554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,32,1,128,1,fp8,fp8,0,0.010384000092744827
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,32,2,128,1,float16,float16,0,0.010407999902963639
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,32,2,128,1,float16,fp8,0,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,32,2,128,1,fp8,fp8,0,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,32,32,128,1,float16,fp8,0,0.01868640035390854
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,32,4,128,1,float16,float16,0,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,32,4,128,1,float16,fp8,0,0.009028799831867218
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,32,4,128,1,fp8,fp8,0,0.010411199927330018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,32,8,128,1,float16,float16,0,0.010465600341558457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,32,8,128,1,float16,fp8,0,0.010366400331258773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,32,8,128,1,fp8,fp8,0,0.010471999645233154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,32,32,128,1,float16,float16,0,0.010416000336408614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,32,32,128,1,float16,fp8,0,0.009876800328493118
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,32,4,128,1,float16,float16,0,0.019649599492549897
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,32,32,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,32,1,128,1,float16,float16,0,0.010390400141477584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,32,1,128,1,float16,fp8,0,0.00939679965376854
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,32,1,128,1,fp8,fp8,0,0.009603200107812881
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,32,2,128,1,float16,float16,0,0.008654399961233138
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,32,2,128,1,float16,fp8,0,0.010044799745082855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,32,2,128,1,fp8,fp8,0,0.009201599657535553
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,32,4,128,1,float16,fp8,0,0.010134399682283402
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,32,8,128,1,float16,float16,0,0.009652800112962722
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,32,4,128,1,fp8,fp8,0,0.00947519987821579
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,32,8,128,1,float16,fp8,0,0.008924800157546996
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,32,8,128,1,fp8,fp8,0,0.009679999947547913
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,32,2,128,1,float16,fp8,0,0.16858240365982055
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,32,4,128,1,fp8,fp8,0,0.010497599840164185
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,32,8,128,1,float16,fp8,0,0.010449600219726563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,32,1,128,1,float16,fp8,0,0.010390400141477584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,32,4,128,1,float16,float16,0,0.009563200175762177
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,1,128,1,float16,fp8,0,8.564169311523438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,1,128,1,fp8,fp8,0,8.554649353027344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,2,128,1,float16,fp8,0,8.724263763427734
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,2,128,1,fp8,fp8,0,8.505107116699218
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,1,128,1,float16,float16,0,10.94175033569336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,4,128,1,float16,fp8,0,8.561656188964843
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,4,128,1,float16,float16,0,10.993141174316406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,2,128,1,float16,float16,0,10.907012939453125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,4,128,1,fp8,fp8,0,8.837725067138672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,8,128,1,float16,fp8,0,8.819181060791015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,8,128,1,fp8,fp8,0,8.796084594726562
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,24,128,1,float16,fp8,0,4.414223861694336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,24,128,1,fp8,fp8,0,4.492808151245117
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,8,128,1,float16,float16,0,11.428883361816407
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,1,128,1,float16,float16,0,5.40776481628418
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,1,128,1,float16,fp8,0,4.458078384399414
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,1,128,1,fp8,fp8,0,4.492878341674805
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,2,128,1,float16,fp8,0,4.313768005371093
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,2,128,1,fp8,fp8,0,4.322737503051758
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,2,128,1,float16,float16,0,5.633383941650391
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,4,128,1,float16,fp8,0,4.4776161193847654
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,4,128,1,fp8,fp8,0,4.502569580078125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,4,128,1,float16,float16,0,5.713652801513672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,8,128,1,float16,fp8,0,4.3308063507080075
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,24,128,1,float16,fp8,0,2.283430480957031
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,8,128,1,fp8,fp8,0,4.385268783569336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,24,128,1,fp8,fp8,0,2.3578128814697266
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,8,128,1,float16,float16,0,5.761891174316406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,1,128,1,float16,float16,0,2.780691146850586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,1,128,1,float16,fp8,0,2.235704040527344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,1,128,1,fp8,fp8,0,2.2143856048583985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,2,128,1,float16,fp8,0,2.214588737487793
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,2,128,1,float16,float16,0,2.7467983245849608
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,2,128,1,fp8,fp8,0,2.605508804321289
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,4,128,1,float16,float16,0,2.789200019836426
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,4,128,1,float16,fp8,0,2.287507247924805
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,4,128,1,fp8,fp8,0,2.2047632217407225
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,8,128,1,float16,fp8,0,2.22747688293457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,8,128,1,float16,float16,0,2.916223907470703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,8,128,1,fp8,fp8,0,2.4529840469360353
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,24,128,1,float16,fp8,0,1.1957856178283692
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,24,128,1,fp8,fp8,0,1.2319487571716308
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,1,128,1,float16,fp8,0,1.1797103881835938
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,1,128,1,float16,float16,0,1.4914015769958495
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,2,128,1,float16,float16,0,1.31003999710083
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,2,128,1,float16,fp8,0,1.1504464149475098
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,2,128,1,fp8,fp8,0,1.3996879577636718
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,4,128,1,float16,float16,0,1.3066191673278809
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,4,128,1,float16,fp8,0,1.3029520034790039
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,4,128,1,fp8,fp8,0,1.1435343742370605
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,8,128,1,float16,float16,0,1.3302495956420899
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,8,128,1,float16,fp8,0,1.1927760124206543
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,8,128,1,fp8,fp8,0,1.1958512306213378
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,1,128,1,fp8,fp8,0,1.1524239540100099
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,1,128,1,fp8,fp8,0,4.994342422485351
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,2,128,1,float16,fp8,0,4.943479919433594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,1,128,1,float16,float16,0,6.4452972412109375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,2,128,1,float16,float16,0,6.321814346313476
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,1,128,1,float16,fp8,0,5.089572906494141
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,2,128,1,fp8,fp8,0,4.898251342773437
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,4,128,1,float16,fp8,0,5.187113571166992
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,4,128,1,float16,float16,0,6.365155029296875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,4,128,1,fp8,fp8,0,5.228995132446289
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,8,128,1,float16,float16,0,6.281180953979492
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,8,128,1,float16,fp8,0,5.175203323364258
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,24,128,1,float16,fp8,0,2.6564447402954103
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,8,128,1,fp8,fp8,0,5.094251251220703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,1,128,1,float16,fp8,0,2.5490272521972654
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,24,128,1,fp8,fp8,0,2.8825679779052735
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,1,128,1,float16,float16,0,3.1823984146118165
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,1,128,1,fp8,fp8,0,2.567255973815918
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,2,128,1,float16,fp8,0,2.5070367813110352
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,2,128,1,float16,float16,0,2.97825927734375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,2,128,1,fp8,fp8,0,3.032142448425293
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,4,128,1,float16,float16,0,3.1994319915771485
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,4,128,1,float16,fp8,0,2.4992544174194338
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,4,128,1,fp8,fp8,0,2.8873104095458983
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,8,128,1,float16,float16,0,3.1311391830444335
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,8,128,1,float16,fp8,0,3.0716831207275392
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,8,128,1,fp8,fp8,0,2.588217544555664
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,24,128,1,float16,fp8,0,1.551201629638672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,24,128,1,fp8,fp8,0,1.3821120262145996
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,1,128,1,float16,float16,0,1.464198398590088
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,1,128,1,float16,fp8,0,1.3303327560424805
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,1,128,1,fp8,fp8,0,1.2911952018737793
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,2,128,1,float16,float16,0,1.6053695678710938
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,2,128,1,float16,fp8,0,1.2849920272827149
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,2,128,1,fp8,fp8,0,1.2850671768188477
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,4,128,1,float16,fp8,0,1.2791104316711426
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,4,128,1,fp8,fp8,0,1.2883440017700196
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,4,128,1,float16,float16,0,1.6623743057250977
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,8,128,1,float16,float16,0,1.4842703819274903
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,8,128,1,fp8,fp8,0,1.587945556640625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,24,128,1,float16,fp8,0,0.7819456100463867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,24,128,1,fp8,fp8,0,0.7367424011230469
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,1,128,1,float16,float16,0,0.7723023891448975
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,8,128,1,float16,fp8,0,1.2849743843078614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,1,128,1,float16,fp8,0,0.6955376148223877
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,1,128,1,fp8,fp8,0,0.6896207809448243
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,2,128,1,float16,float16,0,0.7798448085784913
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,2,128,1,float16,fp8,0,0.6898111820220947
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,2,128,1,fp8,fp8,0,0.8146127700805664
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,4,128,1,float16,float16,0,0.7787775993347168
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,4,128,1,float16,fp8,0,0.7383615970611572
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,4,128,1,fp8,fp8,0,0.7523248195648193
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,8,128,1,float16,float16,0,0.7886735916137695
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,8,128,1,float16,fp8,0,0.7319183826446534
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,8,128,1,fp8,fp8,0,0.7116112232208252
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,1,128,1,float16,fp8,0,3.528822326660156
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,1,128,1,float16,float16,0,4.3384544372558596
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,1,128,1,fp8,fp8,0,3.5178478240966795
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,2,128,1,float16,float16,0,4.4258544921875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,2,128,1,float16,fp8,0,3.5672687530517577
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,2,128,1,fp8,fp8,0,3.772092819213867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,4,128,1,float16,float16,0,4.590185546875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,4,128,1,float16,fp8,0,3.6324657440185546
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,4,128,1,fp8,fp8,0,3.8825504302978517
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,8,128,1,float16,float16,0,4.589689636230469
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,8,128,1,float16,fp8,0,3.561663818359375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,8,128,1,fp8,fp8,0,3.877628707885742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,24,128,1,float16,fp8,0,1.8940559387207032
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,24,128,1,fp8,fp8,0,1.9213903427124024
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,1,128,1,float16,float16,0,2.0842880249023437
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,1,128,1,float16,fp8,0,2.113894462585449
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,1,128,1,fp8,fp8,0,1.808732795715332
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,2,128,1,float16,float16,0,2.0427263259887694
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,2,128,1,float16,fp8,0,2.042942428588867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,2,128,1,fp8,fp8,0,1.86560001373291
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,4,128,1,float16,float16,0,2.143507194519043
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,4,128,1,float16,fp8,0,2.1525888442993164
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,4,128,1,fp8,fp8,0,1.765216064453125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,8,128,1,float16,float16,0,2.132491111755371
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,8,128,1,float16,fp8,0,2.1193504333496094
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,8,128,1,fp8,fp8,0,1.8050239562988282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,24,128,1,float16,float16,0,1.1265263557434082
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,24,128,1,float16,fp8,0,1.0033280372619628
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,24,128,1,float16,float16,0,2.239182472229004
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,24,128,1,float16,float16,0,1.3775983810424806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,24,128,1,fp8,fp8,0,1.0019519805908204
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,24,128,1,float16,float16,0,2.6923967361450196
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,24,128,1,float16,float16,0,1.5534000396728516
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,24,128,1,float16,float16,0,0.8116064071655273
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,1,128,1,float16,float16,0,1.144859218597412
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,1,128,1,float16,fp8,0,0.9406432151794434
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,24,128,1,float16,float16,0,3.09771671295166
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,24,128,1,float16,float16,0,5.681985473632812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,2,128,1,fp8,fp8,0,0.9334272384643555
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,2,128,1,float16,float16,0,1.1956704139709473
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,2,128,1,float16,fp8,0,1.009883213043213
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,4,128,1,float16,fp8,0,0.9523167610168457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,4,128,1,float16,float16,0,1.162320041656494
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,4,128,1,fp8,fp8,0,1.1286928176879882
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,24,128,1,float16,float16,0,0.601688003540039
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,8,128,1,float16,fp8,0,0.9301471710205078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,24,128,1,float16,fp8,0,0.5439472198486328
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,8,128,1,float16,float16,0,1.2398528099060058
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,8,128,1,fp8,fp8,0,1.0747903823852538
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,24,128,1,fp8,fp8,0,0.6349440097808838
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,1,128,1,float16,fp8,0,0.5482272148132324
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,1,128,1,float16,float16,0,0.5787504196166993
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,1,128,1,fp8,fp8,0,0.5051839828491211
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,2,128,1,float16,float16,0,0.5576367855072022
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,2,128,1,fp8,fp8,0,0.5274127960205078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,2,128,1,float16,fp8,0,0.6251088142395019
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,4,128,1,float16,float16,0,0.6240960121154785
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,4,128,1,fp8,fp8,0,0.5272607803344727
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,8,128,1,float16,float16,0,0.5624800205230713
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,8,128,1,float16,fp8,0,0.5279727935791015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,8,128,1,fp8,fp8,0,0.5209023952484131
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,1,128,1,fp8,fp8,0,0.9260399818420411
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,4,128,1,float16,fp8,0,0.5312687873840332
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,1,128,1,float16,fp8,0,4.645800018310547
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,1,128,1,fp8,fp8,0,4.637348937988281
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,2,128,1,float16,fp8,0,4.639971160888672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,2,128,1,fp8,fp8,0,4.599353790283203
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,1,128,1,float16,float16,0,5.72163200378418
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,2,128,1,float16,float16,0,5.744694519042969
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,4,128,1,float16,float16,0,5.8046142578125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,4,128,1,float16,fp8,0,4.648788833618164
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,24,128,1,float16,fp8,0,2.6577728271484373
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,24,128,1,float16,float16,0,3.1755903244018553
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,4,128,1,fp8,fp8,0,4.670108795166016
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,24,128,1,fp8,fp8,0,2.65817928314209
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,8,128,1,float16,fp8,0,4.671598434448242
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,8,128,1,fp8,fp8,0,4.684441757202149
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,8,128,1,float16,float16,0,5.96336784362793
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,1,128,1,float16,fp8,0,2.3538192749023437
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,1,128,1,float16,float16,0,2.722870445251465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,1,128,1,fp8,fp8,0,2.650716781616211
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,2,128,1,float16,fp8,0,2.315118408203125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,2,128,1,fp8,fp8,0,2.4216623306274414
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,4,128,1,float16,float16,0,2.775155258178711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,4,128,1,float16,fp8,0,2.857761573791504
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,4,128,1,fp8,fp8,0,2.3903711318969725
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,24,128,1,float16,float16,0,1.4722335815429688
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,8,128,1,float16,fp8,0,2.3192655563354494
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,8,128,1,float16,float16,0,2.978654479980469
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,24,128,1,float16,fp8,0,1.5618224143981934
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,8,128,1,fp8,fp8,0,2.3078895568847657
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,24,128,1,fp8,fp8,0,1.2729968070983886
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,1,128,1,float16,float16,0,1.330299186706543
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,1,128,1,float16,fp8,0,1.4116047859191894
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,1,128,1,fp8,fp8,0,1.2149328231811523
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,2,128,1,float16,float16,0,1.45524320602417
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,2,128,1,float16,fp8,0,1.2425264358520507
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,2,128,1,fp8,fp8,0,1.2185471534729004
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,2,128,1,float16,float16,0,2.8473472595214844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,4,128,1,float16,float16,0,1.3432527542114259
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,4,128,1,float16,fp8,0,1.390272045135498
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,24,128,1,float16,fp8,0,0.6624879837036133
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,24,128,1,float16,float16,0,0.7603104114532471
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,4,128,1,fp8,fp8,0,1.2826288223266602
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,8,128,1,float16,float16,0,1.3834912300109863
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,8,128,1,fp8,fp8,0,1.1969311714172364
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,24,128,1,fp8,fp8,0,0.8063119888305664
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,1,128,1,float16,float16,0,0.7260079860687256
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,8,128,1,float16,fp8,0,1.4604975700378418
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,1,128,1,float16,fp8,0,0.6259200096130371
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,1,128,1,fp8,fp8,0,0.6309343814849854
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,2,128,1,float16,float16,0,0.7008639812469483
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,2,128,1,float16,fp8,0,0.771120023727417
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,2,128,1,fp8,fp8,0,0.6321807861328125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,4,128,1,float16,fp8,0,0.6570032119750977
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,4,128,1,float16,float16,0,0.7422736167907715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,4,128,1,fp8,fp8,0,0.6995696067810059
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,8,128,1,float16,float16,0,0.7115407943725586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,8,128,1,fp8,fp8,0,0.64791841506958
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,24,128,1,float16,float16,0,0.45597119331359864
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,24,128,1,fp8,fp8,0,0.38030879497528075
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,1,128,1,float16,float16,0,0.3857840061187744
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,1,128,1,fp8,fp8,0,0.3687328100204468
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,2,128,1,float16,float16,0,0.3948976039886475
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,2,128,1,float16,fp8,0,0.37052159309387206
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,2,128,1,fp8,fp8,0,0.35708320140838623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,4,128,1,float16,float16,0,0.3810015916824341
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,4,128,1,float16,fp8,0,0.3705440044403076
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,4,128,1,fp8,fp8,0,0.3705280065536499
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,8,128,1,float16,float16,0,0.38632960319519044
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,8,128,1,float16,fp8,0,0.3562351942062378
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,8,128,1,fp8,fp8,0,0.36350560188293457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,8,128,1,float16,fp8,0,0.6704783916473389
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,24,128,1,float16,fp8,0,0.3735199928283691
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,1,128,1,float16,fp8,0,2.6827951431274415
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,1,128,1,float16,fp8,0,0.3745759963989258
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,1,128,1,fp8,fp8,0,2.72751522064209
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,1,128,1,float16,float16,0,3.2739871978759765
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,2,128,1,float16,float16,0,3.3222625732421873
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,2,128,1,float16,fp8,0,2.7060623168945312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,2,128,1,fp8,fp8,0,2.6838848114013674
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,4,128,1,float16,fp8,0,2.70837287902832
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,4,128,1,fp8,fp8,0,2.7643711090087892
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,4,128,1,float16,float16,0,3.3512863159179687
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,8,128,1,float16,float16,0,3.496956634521484
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,8,128,1,float16,fp8,0,3.1593536376953124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,24,128,1,float16,fp8,0,1.488263988494873
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,24,128,1,float16,float16,0,1.7670448303222657
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,24,128,1,fp8,fp8,0,1.5094896316528321
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,1,128,1,float16,float16,0,1.5616304397583007
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,8,128,1,fp8,fp8,0,2.7047887802124024
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,1,128,1,float16,fp8,0,1.6351024627685546
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,2,128,1,float16,fp8,0,1.3744272232055663
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,2,128,1,float16,float16,0,1.5664159774780273
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,1,128,1,fp8,fp8,0,1.7759664535522461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,2,128,1,fp8,fp8,0,1.555408000946045
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,4,128,1,fp8,fp8,0,1.3829520225524903
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,4,128,1,float16,float16,0,1.571735954284668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,4,128,1,float16,fp8,0,1.7301008224487304
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,8,128,1,float16,float16,0,1.5903008460998536
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,24,128,1,float16,fp8,0,0.8015904426574707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,24,128,1,fp8,fp8,0,0.7695424079895019
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,8,128,1,float16,fp8,0,1.3711039543151855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,1,128,1,float16,float16,0,0.9162447929382325
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,1,128,1,float16,fp8,0,0.8081680297851562
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,8,128,1,fp8,fp8,0,1.6052864074707032
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,1,128,1,fp8,fp8,0,0.8369152069091796
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,2,128,1,float16,float16,0,0.7951344013214111
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,2,128,1,float16,fp8,0,0.7145328044891357
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,2,128,1,fp8,fp8,0,0.7131216049194335
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,4,128,1,float16,fp8,0,0.7243040084838868
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,4,128,1,float16,float16,0,0.8579504013061523
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,4,128,1,fp8,fp8,0,0.777784013748169
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,24,128,1,float16,float16,0,0.45632319450378417
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,8,128,1,float16,float16,0,0.8374128341674805
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,24,128,1,float16,fp8,0,0.41861758232116697
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,8,128,1,fp8,fp8,0,0.7148047924041748
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,24,128,1,fp8,fp8,0,0.47405281066894533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,1,128,1,float16,float16,0,0.41855998039245607
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,1,128,1,float16,fp8,0,0.3855776071548462
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,1,128,1,fp8,fp8,0,0.45056638717651365
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,2,128,1,float16,float16,0,0.4149199962615967
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,2,128,1,float16,fp8,0,0.3861632108688354
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,2,128,1,fp8,fp8,0,0.43363680839538576
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,4,128,1,float16,float16,0,0.42384958267211914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,4,128,1,float16,fp8,0,0.385481595993042
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,4,128,1,fp8,fp8,0,0.41795997619628905
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,8,128,1,float16,fp8,0,0.3863168001174927
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,24,128,1,float16,float16,0,0.25684161186218263
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,8,128,1,fp8,fp8,0,0.40668959617614747
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,24,128,1,float16,fp8,0,0.23673760890960693
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,1,128,1,float16,float16,0,0.22872960567474365
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,24,128,1,fp8,fp8,0,0.25418720245361326
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,1,128,1,float16,fp8,0,0.22547519207000732
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,2,128,1,float16,float16,0,0.2310352087020874
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,1,128,1,fp8,fp8,0,0.23221120834350586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,2,128,1,float16,fp8,0,0.22086560726165771
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,2,128,1,fp8,fp8,0,0.23796160221099855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,4,128,1,float16,float16,0,0.2336479902267456
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,4,128,1,float16,fp8,0,0.22143518924713135
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,24,128,1,float16,float16,0,0.8915488243103027
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,8,128,1,float16,float16,0,0.2382512092590332
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,8,128,1,float16,fp8,0,0.2284543991088867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,8,128,1,fp8,fp8,0,0.22201919555664062
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,8,128,1,float16,fp8,0,0.7709936141967774
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,1,128,1,float16,fp8,0,2.612179183959961
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,8,128,1,float16,float16,0,0.4264688014984131
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,2,128,1,float16,fp8,0,2.6122575759887696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,1,128,1,float16,float16,0,3.033830451965332
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,2,128,1,float16,float16,0,3.07159366607666
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,4,128,1,fp8,fp8,0,0.23260319232940674
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,2,128,1,fp8,fp8,0,2.60819034576416
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,4,128,1,float16,fp8,0,2.6089263916015626
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,4,128,1,fp8,fp8,0,2.6407007217407226
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,4,128,1,float16,float16,0,3.0885791778564453
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,24,128,1,float16,float16,0,1.7548255920410156
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,8,128,1,float16,fp8,0,2.840292739868164
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,8,128,1,fp8,fp8,0,2.6237632751464846
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,8,128,1,float16,float16,0,3.3185630798339845
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,1,128,1,fp8,fp8,0,2.60739688873291
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,24,128,1,float16,fp8,0,1.4814751625061036
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,24,128,1,fp8,fp8,0,1.4738032341003418
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,1,128,1,float16,float16,0,1.606991958618164
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,1,128,1,fp8,fp8,0,1.3461855888366698
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,1,128,1,float16,fp8,0,1.479097557067871
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,2,128,1,float16,float16,0,1.4489359855651855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,2,128,1,float16,fp8,0,1.6443536758422852
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,4,128,1,float16,float16,0,1.5009039878845214
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,4,128,1,fp8,fp8,0,1.3223648071289062
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,4,128,1,float16,fp8,0,1.6420576095581054
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,8,128,1,float16,float16,0,1.5272543907165528
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,8,128,1,fp8,fp8,0,1.3665776252746582
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,24,128,1,float16,float16,0,0.9443920135498047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,24,128,1,float16,fp8,0,0.7534063816070556
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,24,128,1,fp8,fp8,0,0.8331071853637695
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,1,128,1,float16,float16,0,0.7808127880096436
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,1,128,1,float16,fp8,0,0.7022575855255127
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,1,128,1,fp8,fp8,0,0.8192815780639648
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,2,128,1,float16,float16,0,0.7347983837127685
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,2,128,1,float16,fp8,0,0.6921199798583985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,2,128,1,fp8,fp8,0,0.8091919898986817
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,4,128,1,float16,float16,0,0.7431583881378174
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,4,128,1,float16,fp8,0,0.7125520229339599
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,2,128,1,fp8,fp8,0,1.3247440338134766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,4,128,1,fp8,fp8,0,0.6817999839782715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,8,128,1,float16,float16,0,0.771124792098999
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,24,128,1,float16,fp8,0,0.3982656002044678
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,24,128,1,float16,float16,0,0.44721760749816897
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,24,128,1,fp8,fp8,0,0.39859039783477784
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,8,128,1,float16,fp8,0,0.7334479808807373
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,1,128,1,float16,float16,0,0.38178238868713377
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,1,128,1,float16,fp8,0,0.36198561191558837
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,2,128,1,float16,float16,0,0.3844624042510986
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,1,128,1,fp8,fp8,0,0.3677263975143433
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,2,128,1,fp8,fp8,0,0.4099760055541992
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,4,128,1,float16,float16,0,0.38483519554138185
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,4,128,1,float16,fp8,0,0.36789441108703613
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,4,128,1,fp8,fp8,0,0.3665071964263916
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,8,128,1,float16,float16,0,0.40758399963378905
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,24,128,1,float16,float16,0,0.23809440135955812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,8,128,1,float16,fp8,0,0.36348960399627683
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,8,128,1,fp8,fp8,0,0.3675647974014282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,24,128,1,float16,fp8,0,0.22041759490966797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,24,128,1,fp8,fp8,0,0.2352031946182251
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,1,128,1,float16,float16,0,0.2079535961151123
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,1,128,1,float16,fp8,0,0.20150880813598632
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,1,128,1,fp8,fp8,0,0.2027008056640625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,2,128,1,float16,float16,0,0.208734393119812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,2,128,1,float16,fp8,0,0.21718080043792726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,2,128,1,fp8,fp8,0,0.2019279956817627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,4,128,1,float16,fp8,0,0.20308480262756348
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,4,128,1,fp8,fp8,0,0.20251679420471191
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,8,128,1,float16,float16,0,0.21742079257965088
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,8,128,1,fp8,fp8,0,0.2021536111831665
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,24,128,1,float16,float16,0,0.13874080181121826
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,24,128,1,float16,fp8,0,0.13091039657592773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,24,128,1,fp8,fp8,0,0.12911839485168458
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,1,128,1,float16,float16,0,0.1252351999282837
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,1,128,1,fp8,fp8,0,0.12063679695129395
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,2,128,1,float16,float16,0,0.12523839473724366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,2,128,1,float16,fp8,0,0.12087680101394653
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,2,128,1,fp8,fp8,0,0.12114880084991456
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,4,128,1,float16,float16,0,0.12634880542755128
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,4,128,1,float16,fp8,0,0.12077280282974243
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,4,128,1,fp8,fp8,0,0.121014404296875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,8,128,1,float16,float16,0,0.1282256007194519
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,8,128,1,float16,fp8,0,0.12153439521789551
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,8,128,1,fp8,fp8,0,0.6864848136901855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,8,128,1,fp8,fp8,0,0.12125600576400757
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,2,128,1,float16,fp8,0,0.37703518867492675
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,8,128,1,float16,fp8,0,1.4791855812072754
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,1,128,1,float16,fp8,0,1.5994879722595214
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,1,128,1,float16,float16,0,1.76299991607666
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,1,128,1,fp8,fp8,0,1.6016544342041015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,4,128,1,float16,float16,0,0.21189119815826415
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,8,128,1,float16,fp8,0,0.21751039028167723
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,2,128,1,float16,float16,0,1.7393775939941407
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,1,128,1,float16,fp8,0,0.12095199823379517
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,2,128,1,float16,fp8,0,1.6020799636840821
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,2,128,1,fp8,fp8,0,1.600611114501953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,4,128,1,float16,fp8,0,1.6008880615234375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,4,128,1,fp8,fp8,0,1.6032096862792968
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,4,128,1,float16,float16,0,1.9252431869506836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,24,128,1,float16,float16,0,1.05349760055542
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,8,128,1,float16,float16,0,1.8333200454711913
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,8,128,1,float16,fp8,0,1.7821088790893556
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,8,128,1,fp8,fp8,0,1.6037792205810546
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,24,128,1,float16,fp8,0,0.9296256065368652
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,1,128,1,float16,float16,0,0.8606255531311036
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,1,128,1,float16,fp8,0,0.973755168914795
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,1,128,1,fp8,fp8,0,0.9733936309814453
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,2,128,1,float16,fp8,0,0.8485136032104492
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,2,128,1,float16,float16,0,0.9922127723693848
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,2,128,1,fp8,fp8,0,0.8176719665527343
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,4,128,1,float16,float16,0,0.8806511878967285
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,4,128,1,float16,fp8,0,0.8189279556274414
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,4,128,1,fp8,fp8,0,0.8801775932312011
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,24,128,1,float16,float16,0,0.5352303981781006
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,24,128,1,float16,fp8,0,0.47945442199707033
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,8,128,1,float16,fp8,0,0.8250991821289062
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,24,128,1,fp8,fp8,0,0.5011136054992675
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,8,128,1,fp8,fp8,0,0.9149344444274903
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,1,128,1,float16,float16,0,0.4460912227630615
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,1,128,1,float16,fp8,0,0.42560319900512694
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,1,128,1,fp8,fp8,0,0.4776591777801514
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,2,128,1,float16,float16,0,0.44732961654663084
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,2,128,1,float16,fp8,0,0.4366799831390381
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,2,128,1,fp8,fp8,0,0.43175678253173827
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,4,128,1,float16,float16,0,0.4544879913330078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,4,128,1,float16,fp8,0,0.47702879905700685
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,4,128,1,fp8,fp8,0,0.4291056156158447
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,8,128,1,float16,float16,0,0.4684288024902344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,8,128,1,float16,fp8,0,0.42905597686767577
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,8,128,1,fp8,fp8,0,0.4269455909729004
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,24,128,1,float16,float16,0,0.29863040447235106
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,24,128,1,float16,fp8,0,0.25847198963165285
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,24,128,1,fp8,fp8,0,0.26318399906158446
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,1,128,1,float16,float16,0,0.23564960956573486
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,1,128,1,float16,fp8,0,0.23062078952789306
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,1,128,1,fp8,fp8,0,0.23299360275268555
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,2,128,1,float16,fp8,0,0.2341599941253662
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,24,128,1,fp8,fp8,0,0.931180763244629
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,4,128,1,fp8,fp8,0,0.23095040321350097
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,4,128,1,float16,fp8,0,0.23197600841522217
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,8,128,1,fp8,fp8,0,0.23204638957977294
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,8,128,1,float16,float16,0,0.249182391166687
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,8,128,1,float16,fp8,0,0.23097119331359864
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,24,128,1,float16,float16,0,0.15684159994125366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,24,128,1,fp8,fp8,0,0.14683680534362792
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,1,128,1,float16,float16,0,0.13547040224075318
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,1,128,1,float16,fp8,0,0.13192479610443114
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,1,128,1,fp8,fp8,0,0.13256959915161132
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,8,128,1,float16,float16,0,0.9658143997192383
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,2,128,1,float16,float16,0,0.1344704031944275
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,2,128,1,float16,fp8,0,0.13230400085449218
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,2,128,1,fp8,fp8,0,0.1322175979614258
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,4,128,1,float16,float16,0,0.13713120222091674
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,4,128,1,float16,fp8,0,0.13124639987945558
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,4,128,1,fp8,fp8,0,0.1314703941345215
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,8,128,1,float16,float16,0,0.14158719778060913
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,8,128,1,float16,fp8,0,0.13167840242385864
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,8,128,1,fp8,fp8,0,0.13239840269088746
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,24,128,1,float16,fp8,0,0.08962720036506652
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,24,128,1,fp8,fp8,0,0.08975359797477722
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,1,128,1,float16,float16,0,0.08620640039443969
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,1,128,1,float16,fp8,0,0.08340319991111755
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,1,128,1,fp8,fp8,0,0.08309280276298522
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,2,128,1,float16,float16,0,0.08632640242576599
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,2,128,1,float16,fp8,0,0.0826192021369934
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,2,128,1,fp8,fp8,0,0.08300319910049439
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,4,128,1,float16,float16,0,0.08677759766578674
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,4,128,1,float16,fp8,0,0.08298559784889221
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,4,128,1,fp8,fp8,0,0.08317599892616272
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,8,128,1,float16,float16,0,0.08799039721488952
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,8,128,1,float16,fp8,0,0.08326879739761353
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,8,128,1,fp8,fp8,0,0.08318399786949157
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,2,128,1,float16,float16,0,0.23552799224853516
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,2,128,1,fp8,fp8,0,0.23215041160583497
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,4,128,1,float16,float16,0,0.2393807888031006
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,24,128,1,float16,fp8,0,0.14653279781341552
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,1,128,1,float16,float16,0,1.678548812866211
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,1,128,1,float16,fp8,0,1.6406288146972656
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,1,128,1,fp8,fp8,0,1.642425537109375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,2,128,1,float16,float16,0,1.6962064743041991
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,2,128,1,float16,fp8,0,1.6408784866333008
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,2,128,1,fp8,fp8,0,1.6430736541748048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,24,128,1,float16,float16,0,0.09708319902420044
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,4,128,1,float16,float16,0,1.760211181640625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,24,128,1,float16,fp8,0,0.9750991821289062
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,4,128,1,fp8,fp8,0,1.6398160934448243
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,4,128,1,float16,fp8,0,1.792095947265625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,8,128,1,float16,float16,0,1.8462959289550782
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,8,128,1,float16,fp8,0,1.8190895080566407
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,8,128,1,fp8,fp8,0,1.6405792236328125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,24,128,1,fp8,fp8,0,1.0965392112731933
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,1,128,1,float16,float16,0,0.8450639724731446
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,1,128,1,float16,fp8,0,0.8337743759155274
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,1,128,1,fp8,fp8,0,0.8527615547180176
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,2,128,1,float16,fp8,0,0.8514960289001465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,2,128,1,float16,float16,0,0.922704029083252
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,2,128,1,fp8,fp8,0,0.8440511703491211
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,4,128,1,float16,float16,0,0.8892399787902832
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,4,128,1,float16,fp8,0,0.845577621459961
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,4,128,1,fp8,fp8,0,0.8334367752075196
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,8,128,1,float16,float16,0,0.9170000076293945
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,24,128,1,float16,float16,0,0.556612777709961
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,8,128,1,float16,fp8,0,0.8393664360046387
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,24,128,1,float16,fp8,0,0.5205376148223877
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,1,128,1,float16,float16,0,0.43444480895996096
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,24,128,1,fp8,fp8,0,0.517249584197998
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,1,128,1,float16,fp8,0,0.44582719802856446
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,2,128,1,float16,float16,0,0.4345104217529297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,2,128,1,float16,fp8,0,0.4489295959472656
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,2,128,1,fp8,fp8,0,0.4402783870697021
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,4,128,1,float16,float16,0,0.44279842376708983
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,4,128,1,float16,fp8,0,0.4392223834991455
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,4,128,1,fp8,fp8,0,0.4297039985656738
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,8,128,1,float16,float16,0,0.47252960205078126
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,8,128,1,float16,fp8,0,0.43890881538391113
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,8,128,1,fp8,fp8,0,0.4307663917541504
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,24,128,1,float16,float16,0,1.1042960166931153
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,24,128,1,float16,float16,0,0.2875504016876221
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,24,128,1,float16,fp8,0,0.264736008644104
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,24,128,1,fp8,fp8,0,0.2713184118270874
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,1,128,1,float16,fp8,0,0.2289263963699341
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,2,128,1,float16,float16,0,0.22740480899810792
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,2,128,1,float16,fp8,0,0.23383839130401612
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,2,128,1,fp8,fp8,0,0.2287424087524414
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,4,128,1,float16,float16,0,0.23239679336547853
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,4,128,1,fp8,fp8,0,0.22882719039916993
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,8,128,1,float16,float16,0,0.242792010307312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,8,128,1,float16,fp8,0,0.22954399585723878
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,8,128,1,fp8,fp8,0,0.22916479110717775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,24,128,1,float16,float16,0,0.15525120496749878
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,24,128,1,float16,fp8,0,0.14659839868545532
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,24,128,1,fp8,fp8,0,0.14658399820327758
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,1,128,1,float16,float16,0,0.12760159969329835
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,1,128,1,float16,fp8,0,0.12669119834899903
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,1,128,1,fp8,fp8,0,0.1269935965538025
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,2,128,1,float16,float16,0,0.1283136010169983
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,2,128,1,float16,fp8,0,0.126692795753479
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,2,128,1,fp8,fp8,0,0.1276144027709961
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,8,128,1,fp8,fp8,0,0.9416815757751464
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,4,128,1,float16,float16,0,0.13054879903793334
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,4,128,1,float16,fp8,0,0.12748320102691652
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,4,128,1,fp8,fp8,0,0.12697279453277588
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,1,128,1,fp8,fp8,0,0.42892799377441404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,8,128,1,float16,fp8,0,0.12782080173492433
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,24,128,1,float16,float16,0,0.09131199717521668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,8,128,1,fp8,fp8,0,0.1285040020942688
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,24,128,1,float16,fp8,0,0.08492640256881714
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,24,128,1,fp8,fp8,0,0.08503360152244568
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,1,128,1,float16,float16,0,0.07796639800071717
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,1,128,1,float16,fp8,0,0.07590399980545044
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,1,128,1,fp8,fp8,0,0.0762000024318695
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,2,128,1,float16,float16,0,0.07790240049362182
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,2,128,1,float16,fp8,0,0.07645440101623535
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,2,128,1,fp8,fp8,0,0.07623680233955384
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,4,128,1,float16,float16,0,0.0785808026790619
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,4,128,1,float16,fp8,0,0.07631840109825135
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,4,128,1,fp8,fp8,0,0.07623680233955384
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,8,128,1,float16,float16,0,0.08102560043334961
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,8,128,1,fp8,fp8,0,0.0763584017753601
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,24,128,1,float16,float16,0,0.05629439949989319
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,1,128,1,float16,float16,0,0.2287839889526367
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,24,128,1,float16,fp8,0,0.05556640028953552
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,24,128,1,fp8,fp8,0,0.055486398935317996
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,1,128,1,fp8,fp8,0,0.22823519706726075
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,1,128,1,float16,float16,0,0.052356797456741336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,1,128,1,float16,fp8,0,0.05134559869766235
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,1,128,1,fp8,fp8,0,0.051342397928237915
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,2,128,1,float16,float16,0,0.05232639908790589
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,2,128,1,float16,fp8,0,0.05132799744606018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,2,128,1,fp8,fp8,0,0.05115200281143188
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,4,128,1,float16,fp8,0,0.05118240118026733
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,4,128,1,fp8,fp8,0,0.05103679895401001
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,8,128,1,float16,float16,0,0.053609597682952884
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,8,128,1,float16,fp8,0,0.05103039741516113
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,8,128,1,fp8,fp8,0,0.05120800137519836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,4,128,1,float16,fp8,0,0.2309567928314209
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,1,128,1,float16,float16,0,1.0482879638671876
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,1,128,1,float16,fp8,0,1.0547679901123046
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,1,128,1,fp8,fp8,0,1.0532464027404784
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,2,128,1,float16,float16,0,1.065719985961914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,8,128,1,float16,float16,0,0.136518394947052
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,2,128,1,fp8,fp8,0,1.0541168212890626
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,4,128,1,float16,float16,0,1.0906831741333007
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,2,128,1,float16,fp8,0,1.088475227355957
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,4,128,1,float16,fp8,0,1.0693903923034669
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,8,128,1,float16,fp8,0,0.07605599761009216
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,4,128,1,fp8,fp8,0,1.05076322555542
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,8,128,1,float16,float16,0,1.1453760147094727
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,24,128,1,float16,float16,0,0.7207695960998535
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,24,128,1,float16,fp8,0,0.6536367893218994
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,4,128,1,float16,float16,0,0.053206402063369754
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,8,128,1,float16,fp8,0,1.0553855895996094
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,24,128,1,fp8,fp8,0,0.64334077835083
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,1,128,1,float16,float16,0,0.5267312049865722
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,8,128,1,fp8,fp8,0,1.2017503738403321
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,1,128,1,float16,fp8,0,0.5364528179168702
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,1,128,1,fp8,fp8,0,0.541315221786499
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,2,128,1,float16,fp8,0,0.5378335952758789
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,2,128,1,fp8,fp8,0,0.5380896091461181
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,4,128,1,float16,float16,0,0.5439583778381347
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,4,128,1,float16,fp8,0,0.536897611618042
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,4,128,1,fp8,fp8,0,0.5712800025939941
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,8,128,1,float16,float16,0,0.5822896003723145
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,8,128,1,float16,fp8,0,0.5382800102233887
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,24,128,1,float16,fp8,0,0.3346816062927246
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,24,128,1,float16,float16,0,0.36402239799499514
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,24,128,1,fp8,fp8,0,0.3333087921142578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,1,128,1,float16,float16,0,0.2785248041152954
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,1,128,1,float16,fp8,0,0.28439199924468994
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,1,128,1,fp8,fp8,0,0.2803312063217163
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,2,128,1,float16,float16,0,0.2732431888580322
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,2,128,1,float16,fp8,0,0.281496000289917
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,2,128,1,fp8,fp8,0,0.27948000431060793
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,4,128,1,float16,float16,0,0.28613920211791993
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,4,128,1,float16,fp8,0,0.2838032007217407
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,8,128,1,float16,float16,0,0.2965104103088379
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,24,128,1,float16,float16,0,0.19403359889984131
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,8,128,1,fp8,fp8,0,0.2806031942367554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,24,128,1,float16,fp8,0,0.18186399936676026
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,1,128,1,float16,float16,0,0.1505887985229492
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,1,128,1,float16,fp8,0,0.15357919931411743
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,1,128,1,fp8,fp8,0,0.15327680110931396
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,2,128,1,float16,float16,0,0.15140000581741334
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,2,128,1,float16,fp8,0,0.15362880229949952
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,2,128,1,fp8,fp8,0,0.15302720069885253
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,4,128,1,float16,float16,0,0.15452320575714112
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,4,128,1,fp8,fp8,0,0.1538624048233032
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,8,128,1,float16,float16,0,0.16197760105133058
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,8,128,1,float16,fp8,0,0.15484960079193116
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,8,128,1,fp8,fp8,0,0.1526975989341736
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,2,128,1,float16,float16,0,0.5389679908752442
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,24,128,1,float16,float16,0,0.1060096025466919
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,24,128,1,float16,fp8,0,0.1010975956916809
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,24,128,1,fp8,fp8,0,0.1015328049659729
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,1,128,1,float16,float16,0,0.08571199774742126
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,1,128,1,float16,fp8,0,0.08599839806556701
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,1,128,1,fp8,fp8,0,0.08574560284614563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,2,128,1,float16,float16,0,0.08583679795265198
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,8,128,1,fp8,fp8,0,0.5383935928344726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,2,128,1,float16,fp8,0,0.08605599999427796
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,2,128,1,fp8,fp8,0,0.08593119978904724
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,4,128,1,float16,float16,0,0.08969600200653076
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,4,128,1,float16,fp8,0,0.08604320287704467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,4,128,1,fp8,fp8,0,0.08592640161514283
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,8,128,1,float16,fp8,0,0.0864687979221344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,8,128,1,fp8,fp8,0,0.08614559769630432
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,24,128,1,float16,float16,0,0.0639360010623932
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,24,128,1,float16,fp8,0,0.059832000732421876
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,4,128,1,fp8,fp8,0,0.2804192066192627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,24,128,1,fp8,fp8,0,0.05966399908065796
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,1,128,1,float16,float16,0,0.054092800617218016
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,8,128,1,float16,fp8,0,0.2810031890869141
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,1,128,1,float16,fp8,0,0.05380960106849671
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,2,128,1,float16,float16,0,0.05418400168418884
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,2,128,1,float16,fp8,0,0.05365920066833496
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,2,128,1,fp8,fp8,0,0.053908801078796385
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,24,128,1,fp8,fp8,0,0.1791200041770935
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,4,128,1,float16,fp8,0,0.05379840135574341
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,4,128,1,fp8,fp8,0,0.053527998924255374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,8,128,1,float16,float16,0,0.05670080184936523
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,8,128,1,float16,fp8,0,0.053635197877883914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,8,128,1,fp8,fp8,0,0.05339360237121582
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,24,128,1,float16,float16,0,0.035078400373458864
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,24,128,1,float16,fp8,0,0.03502880036830902
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,24,128,1,fp8,fp8,0,0.03497599959373474
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,1,128,1,float16,float16,0,0.03292160034179688
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,1,128,1,float16,fp8,0,0.032071998715400694
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,1,128,1,fp8,fp8,0,0.03261280059814453
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,2,128,1,float16,float16,0,0.03299199938774109
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,2,128,1,float16,fp8,0,0.0320576012134552
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,2,128,1,fp8,fp8,0,0.032411199808120725
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,4,128,1,float16,fp8,0,0.03233599960803986
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,4,128,1,fp8,fp8,0,0.03159359991550446
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,8,128,1,float16,float16,0,0.03473759889602661
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,8,128,1,float16,fp8,0,0.03203200101852417
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,8,128,1,fp8,fp8,0,0.033004799485206605
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,4,128,1,float16,fp8,0,0.15315040349960327
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,1,128,1,float16,float16,0,1.0948320388793946
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,1,128,1,float16,fp8,0,1.1554351806640626
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,1,128,1,fp8,fp8,0,1.1510160446166993
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,8,128,1,float16,float16,0,0.09385759830474853
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,2,128,1,float16,float16,0,1.0977952003479003
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,2,128,1,float16,fp8,0,1.154201602935791
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,1,128,1,fp8,fp8,0,0.053478401899337766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,2,128,1,fp8,fp8,0,1.1541824340820312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,4,128,1,float16,float16,0,1.1410767555236816
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,4,128,1,float16,float16,0,0.05557760000228882
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,4,128,1,float16,float16,0,0.033024001121521
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,4,128,1,float16,fp8,0,1.155403232574463
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,4,128,1,fp8,fp8,0,1.158956813812256
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,24,128,1,float16,fp8,0,0.7262432098388671
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,24,128,1,float16,float16,0,0.8414768218994141
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,8,128,1,float16,float16,0,1.2440863609313966
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,8,128,1,float16,fp8,0,1.1528847694396973
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,8,128,1,fp8,fp8,0,1.1548128128051758
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,1,128,1,float16,float16,0,0.5541183948516846
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,24,128,1,fp8,fp8,0,0.7282815933227539
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,1,128,1,float16,fp8,0,0.6003024101257324
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,1,128,1,fp8,fp8,0,0.5858191967010498
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,2,128,1,float16,float16,0,0.5851344108581543
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,2,128,1,fp8,fp8,0,0.5852960109710693
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,4,128,1,float16,fp8,0,0.5858287811279297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,4,128,1,fp8,fp8,0,0.5861631870269776
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,8,128,1,float16,float16,0,0.6216288089752198
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,8,128,1,float16,fp8,0,0.5859439849853516
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,8,128,1,fp8,fp8,0,0.6083055973052979
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,24,128,1,float16,float16,0,0.40712637901306153
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,24,128,1,float16,fp8,0,0.37290880680084226
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,24,128,1,fp8,fp8,0,0.3726560115814209
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,1,128,1,float16,float16,0,0.2867487907409668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,1,128,1,fp8,fp8,0,0.3158479928970337
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,2,128,1,float16,float16,0,0.2876575946807861
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,2,128,1,float16,fp8,0,0.3027440071105957
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,2,128,1,fp8,fp8,0,0.3026576042175293
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,4,128,1,float16,float16,0,0.2976927995681763
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,4,128,1,fp8,fp8,0,0.3029792070388794
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,8,128,1,float16,float16,0,0.31756319999694826
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,8,128,1,float16,fp8,0,0.3031440019607544
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,8,128,1,fp8,fp8,0,0.30859520435333254
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,24,128,1,float16,float16,0,0.21092159748077394
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,24,128,1,float16,fp8,0,0.19704960584640502
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,1,128,1,float16,float16,0,0.1538864016532898
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,24,128,1,fp8,fp8,0,0.20108320713043212
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,1,128,1,float16,fp8,0,0.16163519620895386
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,1,128,1,fp8,fp8,0,0.16138559579849243
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,2,128,1,float16,float16,0,0.15625280141830444
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,2,128,1,fp8,fp8,0,0.16114879846572877
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,4,128,1,float16,float16,0,0.1604575991630554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,4,128,1,float16,fp8,0,0.16164640188217164
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,4,128,1,fp8,fp8,0,0.1629807949066162
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,8,128,1,float16,float16,0,0.16922240257263182
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,8,128,1,float16,fp8,0,0.16279200315475464
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,8,128,1,fp8,fp8,0,0.16223839521408082
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,24,128,1,float16,float16,0,0.1149407982826233
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,24,128,1,float16,fp8,0,0.10879039764404297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,24,128,1,fp8,fp8,0,0.10854719877243042
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,4,128,1,float16,float16,0,0.5792128086090088
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,1,128,1,float16,fp8,0,0.08903040289878845
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,1,128,1,fp8,fp8,0,0.08881760239601136
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,2,128,1,float16,float16,0,0.0881775975227356
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,2,128,1,float16,fp8,0,0.08960160017013549
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,2,128,1,fp8,fp8,0,0.08925600051879883
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,4,128,1,float16,float16,0,0.09092320203781128
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,4,128,1,fp8,fp8,0,0.09001759886741638
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,8,128,1,float16,float16,0,0.09565119743347168
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,8,128,1,float16,fp8,0,0.09032639861106873
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,8,128,1,fp8,fp8,0,0.09050400257110595
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,24,128,1,float16,float16,0,0.06655679941177368
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,24,128,1,float16,fp8,0,0.06227200031280518
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,24,128,1,fp8,fp8,0,0.06242560148239136
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,1,128,1,float16,float16,0,0.05321440100669861
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,1,128,1,float16,fp8,0,0.05350559949874878
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,1,128,1,fp8,fp8,0,0.05353119969367981
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,2,128,1,float16,fp8,0,0.1616976022720337
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,2,128,1,float16,float16,0,0.0533519983291626
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,2,128,1,float16,fp8,0,0.05371040105819702
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,2,128,1,fp8,fp8,0,0.05353119969367981
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,4,128,1,float16,float16,0,0.05412639975547791
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,4,128,1,float16,fp8,0,0.053257602453231814
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,4,128,1,fp8,fp8,0,0.05389919877052307
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,8,128,1,float16,float16,0,0.056385600566864015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,8,128,1,float16,fp8,0,0.0532368004322052
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,8,128,1,fp8,fp8,0,0.05386719703674316
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,24,128,1,float16,float16,0,0.04032639861106872
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,24,128,1,float16,fp8,0,0.04116159975528717
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,24,128,1,fp8,fp8,0,0.041142401099205014
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,1,128,1,float16,float16,0,0.037088000774383546
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,1,128,1,float16,fp8,0,0.03693279922008515
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,1,128,1,fp8,fp8,0,0.03698880076408386
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,2,128,1,float16,float16,0,0.03703359961509704
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,2,128,1,float16,fp8,0,0.03704639971256256
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,2,128,1,float16,fp8,0,0.6239007949829102
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,4,128,1,float16,float16,0,0.037905600666999814
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,8,128,1,float16,float16,0,0.0390720009803772
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,4,128,1,float16,fp8,0,0.03664799928665161
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,4,128,1,fp8,fp8,0,0.0369376003742218
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,8,128,1,float16,fp8,0,0.037099200487136844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,24,128,1,float16,float16,0,0.026929599046707154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,8,128,1,fp8,fp8,0,0.03696799874305725
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,24,128,1,float16,fp8,0,0.026919999718666078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,24,128,1,fp8,fp8,0,0.026974400877952574
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,1,128,1,float16,float16,0,0.024784000217914583
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,1,128,1,float16,fp8,0,0.02481119930744171
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,1,128,1,fp8,fp8,0,0.024806399643421174
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,2,128,1,float16,float16,0,0.024803200364112855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,4,128,1,float16,fp8,0,0.08933759927749634
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,2,128,1,float16,fp8,0,0.024831999838352204
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,2,128,1,fp8,fp8,0,0.02473759949207306
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,4,128,1,float16,float16,0,0.024852800369262695
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,8,128,1,float16,float16,0,0.026785600185394286
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,1,128,1,float16,fp8,0,0.3031023979187012
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,8,128,1,fp8,fp8,0,0.024721600115299225
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,4,128,1,float16,fp8,0,0.3161263942718506
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,24,1,128,1,float16,float16,0,0.8137375831604003
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,24,1,128,1,float16,fp8,0,0.9076543807983398
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,24,1,128,1,fp8,fp8,0,0.9051424026489258
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,24,2,128,1,float16,float16,0,0.8185135841369628
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,24,2,128,1,float16,fp8,0,0.9054672241210937
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,24,2,128,1,fp8,fp8,0,0.9037263870239258
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,1,128,1,float16,float16,0,0.08789119720458985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,24,4,128,1,float16,float16,0,0.8868063926696778
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,24,4,128,1,float16,fp8,0,0.9047504425048828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,4,128,1,fp8,fp8,0,0.024828800559043886
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,8,128,1,float16,fp8,0,0.0247856006026268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,24,4,128,1,fp8,fp8,0,0.9034175872802734
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,24,8,128,1,float16,float16,0,0.9427215576171875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,24,8,128,1,float16,fp8,0,0.9161184310913086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,24,8,128,1,fp8,fp8,0,0.9043760299682617
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,24,24,128,1,float16,fp8,0,0.5986656188964844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,24,24,128,1,float16,float16,0,0.6500256061553955
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,24,1,128,1,float16,float16,0,0.41547842025756837
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,24,24,128,1,fp8,fp8,0,0.5951344013214112
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,24,1,128,1,float16,fp8,0,0.4651343822479248
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,24,2,128,1,float16,float16,0,0.41716480255126953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,24,1,128,1,fp8,fp8,0,0.4608751773834229
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,24,4,128,1,float16,float16,0,0.4379744052886963
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,24,2,128,1,fp8,fp8,0,0.46976480484008787
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,24,2,128,1,float16,fp8,0,0.46063838005065916
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,24,4,128,1,float16,fp8,0,0.4606368064880371
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,24,4,128,1,fp8,fp8,0,0.4608975887298584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,24,8,128,1,float16,float16,0,0.4770927906036377
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,24,8,128,1,float16,fp8,0,0.45957441329956056
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,24,8,128,1,fp8,fp8,0,0.46442241668701173
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,24,24,128,1,float16,float16,0,0.33086719512939455
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,24,1,128,1,float16,float16,0,0.21830880641937256
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,24,24,128,1,fp8,fp8,0,0.3060960054397583
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,24,1,128,1,fp8,fp8,0,0.23854401111602783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,24,2,128,1,float16,float16,0,0.21863360404968263
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,2,128,1,fp8,fp8,0,0.036817601323127745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,24,2,128,1,float16,fp8,0,0.23818399906158447
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,24,2,128,1,fp8,fp8,0,0.23840000629425048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,24,4,128,1,float16,float16,0,0.2277600049972534
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,24,4,128,1,float16,fp8,0,0.23863520622253417
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,24,4,128,1,fp8,fp8,0,0.23803200721740722
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,24,8,128,1,float16,float16,0,0.24702880382537842
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,24,8,128,1,float16,fp8,0,0.23839681148529052
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,24,24,128,1,float16,float16,0,0.17285920381546022
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,24,8,128,1,fp8,fp8,0,0.23909280300140381
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,24,24,128,1,float16,fp8,0,0.16188000440597533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,4,128,1,float16,fp8,0,0.024843199551105498
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,24,1,128,1,float16,float16,0,0.11837120056152343
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,24,24,128,1,fp8,fp8,0,0.16154559850692748
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,24,1,128,1,float16,fp8,0,0.1279360055923462
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,24,1,128,1,fp8,fp8,0,0.12710560560226442
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,24,2,128,1,float16,float16,0,0.11775840520858764
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,24,2,128,1,fp8,fp8,0,0.12712960243225097
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,24,2,128,1,float16,fp8,0,0.1270959973335266
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,24,4,128,1,float16,float16,0,0.12398560047149658
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,24,4,128,1,float16,fp8,0,0.1268399953842163
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,24,4,128,1,fp8,fp8,0,0.12748479843139648
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,24,8,128,1,float16,float16,0,0.13296960592269896
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,24,24,128,1,float16,float16,0,0.09416639804840088
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,24,8,128,1,float16,fp8,0,0.12772799730300904
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,24,8,128,1,fp8,fp8,0,0.12830239534378052
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,24,24,128,1,float16,fp8,0,0.08982239961624146
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,24,24,128,1,fp8,fp8,0,0.08912000060081482
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,24,1,128,1,float16,float16,0,0.06699680089950562
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,24,1,128,1,fp8,fp8,0,0.07049599885940552
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,24,2,128,1,float16,fp8,0,0.07023360133171082
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,24,4,128,1,float16,float16,0,0.07062559723854064
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,24,4,128,1,float16,fp8,0,0.07064319849014282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,24,4,128,1,fp8,fp8,0,0.0706160008907318
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,24,8,128,1,float16,float16,0,0.07504159808158875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,24,8,128,1,float16,fp8,0,0.07183039784431458
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,24,8,128,1,fp8,fp8,0,0.07192320227622986
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,24,24,128,1,float16,float16,0,0.05420799851417542
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,24,24,128,1,float16,fp8,0,0.050964802503585815
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,24,24,128,1,fp8,fp8,0,0.05036640167236328
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,24,1,128,1,float16,float16,0,0.04085600078105926
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,24,1,128,1,float16,fp8,0,0.04115520119667053
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,24,1,128,1,fp8,fp8,0,0.041247999668121337
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,24,2,128,1,float16,float16,0,0.04029119908809662
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,24,2,128,1,float16,fp8,0,0.041284799575805664
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,24,2,128,1,fp8,fp8,0,0.041340801119804385
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,24,4,128,1,float16,float16,0,0.04121600091457367
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,24,4,128,1,float16,fp8,0,0.04172320067882538
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,24,4,128,1,fp8,fp8,0,0.041152000427246094
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,24,8,128,1,float16,float16,0,0.04427359998226166
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,24,8,128,1,float16,fp8,0,0.041176000237464906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,24,1,128,1,float16,fp8,0,0.23944640159606934
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,24,8,128,1,fp8,fp8,0,0.04222719967365265
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,24,24,128,1,float16,float16,0,0.031230399012565614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,24,24,128,1,float16,fp8,0,0.033022400736808774
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,24,24,128,1,fp8,fp8,0,0.03295519948005676
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,24,1,128,1,float16,float16,0,0.028004801273345946
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,24,1,128,1,float16,fp8,0,0.028702399134635924
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,24,1,128,1,fp8,fp8,0,0.028839999437332155
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,24,2,128,1,float16,float16,0,0.02839359939098358
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,24,2,128,1,float16,fp8,0,0.028836798667907716
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,24,2,128,1,fp8,fp8,0,0.028854399919509888
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,24,4,128,1,float16,float16,0,0.028891199827194215
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,24,4,128,1,fp8,fp8,0,0.02882080078125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,24,4,128,1,float16,fp8,0,0.028870400786399842
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,24,8,128,1,float16,fp8,0,0.02889760136604309
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,24,8,128,1,fp8,fp8,0,0.028905600309371948
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,24,24,128,1,float16,float16,0,0.020734399557113647
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,24,24,128,1,float16,fp8,0,0.021107199788093566
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,24,24,128,1,fp8,fp8,0,0.02072480022907257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,24,1,128,1,float16,float16,0,0.018680000305175783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,24,1,128,1,float16,fp8,0,0.01870879977941513
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,24,1,128,1,fp8,fp8,0,0.018667200207710268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,24,2,128,1,float16,float16,0,0.018681600689888
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,24,2,128,1,fp8,fp8,0,0.01865919977426529
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,24,2,128,1,float16,fp8,0,0.01894560009241104
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,24,4,128,1,float16,float16,0,0.018993599712848662
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,24,4,128,1,float16,fp8,0,0.018705600500106813
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,24,4,128,1,fp8,fp8,0,0.01929599940776825
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,24,8,128,1,float16,float16,0,0.02062239944934845
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,24,8,128,1,float16,fp8,0,0.018680000305175783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,24,24,128,1,float16,float16,0,0.01902720034122467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,24,8,128,1,fp8,fp8,0,0.019054399430751802
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,24,24,128,1,float16,fp8,0,0.018743999302387238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,24,24,128,1,fp8,fp8,0,0.01868479996919632
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,24,1,128,1,float16,float16,0,0.018624000251293182
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,24,1,128,1,fp8,fp8,0,0.01857600063085556
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,24,2,128,1,float16,float16,0,0.018611200153827667
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,24,2,128,1,float16,fp8,0,0.018587200343608855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,24,2,128,1,fp8,fp8,0,0.018636800348758698
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,24,4,128,1,float16,float16,0,0.018641600012779237
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,24,4,128,1,float16,fp8,0,0.018571199476718904
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,24,1,128,1,float16,fp8,0,0.06998080015182495
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,24,4,128,1,fp8,fp8,0,0.018598400056362152
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,24,2,128,1,fp8,fp8,0,0.06999840140342713
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,24,8,128,1,float16,fp8,0,0.018585599958896637
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,24,8,128,1,fp8,fp8,0,0.01867839992046356
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,24,1,128,1,float16,float16,0,0.3432528018951416
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,24,1,128,1,float16,fp8,0,0.3918447971343994
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,24,1,128,1,fp8,fp8,0,0.3935408115386963
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,24,2,128,1,float16,float16,0,0.34503200054168703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,24,24,128,1,float16,fp8,0,0.3060336112976074
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,24,2,128,1,float16,fp8,0,0.392576003074646
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,24,2,128,1,fp8,fp8,0,0.39223039150238037
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,24,8,128,1,float16,float16,0,0.02967199981212616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,24,4,128,1,float16,float16,0,0.3644144058227539
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,24,4,128,1,float16,fp8,0,0.39209120273590087
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,24,4,128,1,fp8,fp8,0,0.3923232078552246
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,24,8,128,1,float16,float16,0,0.4038112163543701
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,24,8,128,1,float16,fp8,0,0.39289920330047606
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,24,24,128,1,float16,float16,0,0.2905407905578613
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,24,8,128,1,fp8,fp8,0,0.39190239906311036
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,24,24,128,1,float16,fp8,0,0.269321608543396
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,24,1,128,1,float16,float16,0,0.17990560531616212
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,24,1,128,1,float16,fp8,0,0.20193760395050048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,24,1,128,1,float16,fp8,0,0.018688000738620758
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,24,1,128,1,fp8,fp8,0,0.20167360305786133
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,24,2,128,1,float16,fp8,0,0.20211520195007324
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,24,2,128,1,float16,float16,0,0.06801599860191346
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,24,4,128,1,float16,float16,0,0.18889119625091552
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,24,8,128,1,float16,float16,0,0.018680000305175783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,24,4,128,1,float16,fp8,0,0.2017888069152832
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,24,4,128,1,fp8,fp8,0,0.20237600803375244
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,24,8,128,1,float16,float16,0,0.208406400680542
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,24,24,128,1,float16,float16,0,0.1533951997756958
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,24,8,128,1,fp8,fp8,0,0.20241119861602783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,24,24,128,1,float16,fp8,0,0.14358240365982056
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,24,24,128,1,fp8,fp8,0,0.143777596950531
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,24,1,128,1,float16,float16,0,0.09900959730148315
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,24,1,128,1,float16,fp8,0,0.10905760526657104
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,24,2,128,1,float16,float16,0,0.09935200214385986
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,24,2,128,1,float16,fp8,0,0.10856640338897705
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,24,2,128,1,fp8,fp8,0,0.10914080142974854
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,24,4,128,1,float16,float16,0,0.10388000011444092
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,24,4,128,1,fp8,fp8,0,0.10913759469985962
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,24,8,128,1,float16,float16,0,0.11271040439605713
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,24,8,128,1,float16,fp8,0,0.10946240425109863
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,24,8,128,1,fp8,fp8,0,0.10903199911117553
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,24,24,128,1,float16,float16,0,0.0849407970905304
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,24,24,128,1,float16,fp8,0,0.08014240264892578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,24,24,128,1,fp8,fp8,0,0.08014240264892578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,24,1,128,1,float16,float16,0,0.05764319896697998
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,24,1,128,1,float16,fp8,0,0.06089119911193848
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,24,1,128,1,fp8,fp8,0,0.06128799915313721
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,24,2,128,1,float16,float16,0,0.05779359936714172
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,24,2,128,1,float16,fp8,0,0.061459201574325564
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,24,2,128,1,fp8,fp8,0,0.06146399974822998
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,24,4,128,1,float16,float16,0,0.060267198085784915
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,24,4,128,1,float16,fp8,0,0.06146399974822998
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,24,4,128,1,fp8,fp8,0,0.06169760227203369
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,24,8,128,1,float16,float16,0,0.06536319851875305
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,24,8,128,1,float16,fp8,0,0.06187999844551086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,24,8,128,1,fp8,fp8,0,0.06166560053825378
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,24,24,128,1,float16,float16,0,0.04740320146083832
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,24,24,128,1,float16,fp8,0,0.04325439929962158
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,24,24,128,1,fp8,fp8,0,0.043747198581695554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,24,1,128,1,float16,float16,0,0.03293440043926239
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,24,1,128,1,float16,fp8,0,0.03507040143013
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,24,24,128,1,fp8,fp8,0,0.269702410697937
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,24,1,128,1,fp8,fp8,0,0.034964799880981445
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,24,2,128,1,float16,float16,0,0.03303839862346649
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,24,2,128,1,float16,fp8,0,0.035102400183677676
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,24,2,128,1,fp8,fp8,0,0.0350816011428833
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,24,4,128,1,float16,fp8,0,0.035068801045417784
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,24,2,128,1,float16,float16,0,0.18053760528564453
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,24,4,128,1,fp8,fp8,0,0.0350383996963501
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,24,2,128,1,fp8,fp8,0,0.20155680179595947
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,24,8,128,1,float16,float16,0,0.037041598558425905
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,24,8,128,1,float16,fp8,0,0.035068801045417784
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,24,24,128,1,float16,float16,0,0.026859200000762938
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,24,24,128,1,float16,fp8,0,0.02886880040168762
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,24,24,128,1,fp8,fp8,0,0.028974398970603943
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,24,1,128,1,float16,float16,0,0.02295520007610321
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,24,1,128,1,float16,fp8,0,0.024831999838352204
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,24,1,128,1,fp8,fp8,0,0.024801599979400634
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,24,2,128,1,float16,float16,0,0.023583999276161192
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,24,2,128,1,float16,fp8,0,0.02476159930229187
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,24,2,128,1,fp8,fp8,0,0.024859200417995452
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,24,4,128,1,float16,float16,0,0.024779200553894043
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,24,4,128,1,float16,fp8,0,0.024851199984550477
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,24,4,128,1,fp8,fp8,0,0.02483679950237274
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,24,8,128,1,float16,float16,0,0.024908800423145295
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,24,8,128,1,float16,fp8,0,0.02486560046672821
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,24,8,128,1,fp8,fp8,0,0.024809600412845613
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,24,24,128,1,fp8,fp8,0,0.018699200451374055
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,24,1,128,1,fp8,fp8,0,0.10898560285568237
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,24,1,128,1,float16,float16,0,0.01658560037612915
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,24,1,128,1,float16,fp8,0,0.016539199650287627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,24,2,128,1,float16,float16,0,0.01656000018119812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,24,1,128,1,fp8,fp8,0,0.01661120057106018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,24,2,128,1,float16,fp8,0,0.01656640022993088
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,24,4,128,1,float16,fp8,0,0.10887999534606933
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,24,2,128,1,fp8,fp8,0,0.016627199947834015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,24,4,128,1,float16,float16,0,0.016612799465656282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,24,4,128,1,float16,fp8,0,0.016625599563121797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,24,4,128,1,fp8,fp8,0,0.016607999801635742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,24,8,128,1,float16,float16,0,0.016604800522327424
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,24,8,128,1,float16,fp8,0,0.016633599996566772
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,24,8,128,1,fp8,fp8,0,0.016631999611854555
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,24,24,128,1,float16,float16,0,0.01664479970932007
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,24,24,128,1,float16,fp8,0,0.016540800034999848
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,24,24,128,1,fp8,fp8,0,0.016601599752902985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,24,1,128,1,float16,float16,0,0.014591999351978302
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,24,1,128,1,float16,fp8,0,0.014595200121402741
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,24,1,128,1,fp8,fp8,0,0.015227200090885162
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,24,2,128,1,float16,float16,0,0.014643199741840363
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,24,2,128,1,float16,fp8,0,0.014694400131702423
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,24,2,128,1,fp8,fp8,0,0.014753599464893342
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,24,4,128,1,float16,float16,0,0.014596800506114959
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,24,4,128,1,float16,fp8,0,0.015289600193500518
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,24,4,128,1,fp8,fp8,0,0.014585599303245544
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,24,8,128,1,float16,float16,0,0.015140800178050995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,24,8,128,1,float16,fp8,0,0.014585599303245544
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,24,24,128,1,float16,float16,0,0.01512639969587326
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,24,24,128,1,float16,fp8,0,0.014596800506114959
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,24,24,128,1,fp8,fp8,0,0.01459999978542328
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,24,1,128,1,float16,float16,0,0.014480000734329224
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,24,1,128,1,float16,fp8,0,0.014587199687957764
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,24,1,128,1,fp8,fp8,0,0.0144896000623703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,24,2,128,1,float16,float16,0,0.014580799639225006
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,24,2,128,1,float16,fp8,0,0.01462399959564209
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,24,2,128,1,fp8,fp8,0,0.014574399590492249
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,24,4,128,1,float16,float16,0,0.034318399429321286
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,24,4,128,1,float16,float16,0,0.014579200744628906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,24,4,128,1,float16,fp8,0,0.014532800018787383
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,24,4,128,1,fp8,fp8,0,0.014528000354766845
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,24,8,128,1,fp8,fp8,0,0.035139200091362
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,24,8,128,1,float16,float16,0,0.014620800316333771
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,24,8,128,1,float16,fp8,0,0.014606399834156037
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,24,8,128,1,fp8,fp8,0,0.014508800208568573
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,24,8,128,1,float16,fp8,0,0.2034127950668335
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,24,1,128,1,float16,fp8,0,0.23614881038665772
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,24,1,128,1,fp8,fp8,0,0.23539519309997559
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,24,24,128,1,float16,float16,0,0.018638400733470915
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,24,24,128,1,float16,fp8,0,0.01865279972553253
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,24,2,128,1,float16,float16,0,0.2134943962097168
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,24,2,128,1,float16,fp8,0,0.23633279800415039
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,24,2,128,1,fp8,fp8,0,0.23532960414886475
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,24,4,128,1,float16,float16,0,0.22214078903198242
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,24,4,128,1,fp8,fp8,0,0.2362607955932617
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,24,4,128,1,float16,fp8,0,0.23514881134033203
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,24,8,128,1,float16,fp8,0,0.23494880199432372
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,24,8,128,1,fp8,fp8,0,0.23558399677276612
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,24,24,128,1,float16,float16,0,0.16541119813919067
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,24,24,128,1,float16,fp8,0,0.15709919929504396
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,24,24,128,1,fp8,fp8,0,0.15749119520187377
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,24,1,128,1,float16,fp8,0,0.12316639423370361
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,24,1,128,1,fp8,fp8,0,0.12250880002975464
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,24,2,128,1,float16,float16,0,0.11354880332946778
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,24,2,128,1,float16,fp8,0,0.1229375958442688
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,24,2,128,1,fp8,fp8,0,0.12280160188674927
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,24,4,128,1,float16,float16,0,0.11800800561904908
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,24,4,128,1,float16,fp8,0,0.12309919595718384
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,24,4,128,1,fp8,fp8,0,0.12310080528259278
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,24,8,128,1,float16,float16,0,0.12673599720001222
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,24,8,128,1,fp8,fp8,0,0.014552000164985656
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,24,8,128,1,float16,fp8,0,0.12318880558013916
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,24,8,128,1,fp8,fp8,0,0.12342560291290283
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,24,24,128,1,float16,fp8,0,0.08428320288658142
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,24,24,128,1,fp8,fp8,0,0.08563039898872375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,24,1,128,1,float16,fp8,0,0.06591839790344238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,24,1,128,1,fp8,fp8,0,0.06656799912452697
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,24,2,128,1,float16,float16,0,0.06307359933853149
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,24,2,128,1,float16,fp8,0,0.06644799709320068
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,24,2,128,1,fp8,fp8,0,0.06706399917602539
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,24,4,128,1,float16,float16,0,0.06651520133018493
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,24,4,128,1,float16,fp8,0,0.0675599992275238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,24,4,128,1,fp8,fp8,0,0.0669983983039856
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,24,8,128,1,float16,float16,0,0.07016159892082215
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,24,8,128,1,float16,fp8,0,0.06789439916610718
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,24,8,128,1,fp8,fp8,0,0.0676800012588501
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,24,24,128,1,float16,float16,0,0.0494623988866806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,24,24,128,1,float16,fp8,0,0.04749279916286468
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,24,24,128,1,fp8,fp8,0,0.047460800409317015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,24,1,128,1,float16,float16,0,0.03711200058460236
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,24,1,128,1,float16,fp8,0,0.03914720118045807
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,24,1,128,1,fp8,fp8,0,0.03912000060081482
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,24,2,128,1,float16,float16,0,0.03711679875850678
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,24,2,128,1,float16,fp8,0,0.0390751987695694
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,24,1,128,1,float16,float16,0,0.21232960224151612
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,24,2,128,1,fp8,fp8,0,0.03911519944667816
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,24,4,128,1,float16,float16,0,0.03781920075416565
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,24,4,128,1,float16,fp8,0,0.039108800888061526
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,24,8,128,1,float16,float16,0,0.04086560010910034
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,24,8,128,1,float16,fp8,0,0.03914079964160919
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,24,24,128,1,float16,float16,0,0.026659199595451356
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,24,8,128,1,fp8,fp8,0,0.03914079964160919
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,24,24,128,1,float16,fp8,0,0.0281792014837265
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,24,24,128,1,fp8,fp8,0,0.02683840095996857
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,24,1,128,1,float16,float16,0,0.022759999334812164
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,24,1,128,1,float16,fp8,0,0.022737599909305573
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,24,1,128,1,fp8,fp8,0,0.02285760045051575
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,24,2,128,1,float16,float16,0,0.022735999524593355
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,24,2,128,1,fp8,fp8,0,0.02294880002737045
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,24,8,128,1,float16,float16,0,0.24123680591583252
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,24,4,128,1,float16,float16,0,0.02274399995803833
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,24,4,128,1,float16,fp8,0,0.022812800109386445
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,24,4,128,1,fp8,fp8,0,0.023825600743293762
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,24,8,128,1,float16,float16,0,0.024728000164031982
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,24,1,128,1,float16,float16,0,0.11260800361633301
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,24,8,128,1,fp8,fp8,0,0.023401600122451783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,24,8,128,1,float16,fp8,0,0.024051199853420257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,24,24,128,1,float16,float16,0,0.018688000738620758
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,24,24,128,1,float16,fp8,0,0.02051839977502823
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,24,24,128,1,fp8,fp8,0,0.020414400100708007
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,24,1,128,1,float16,float16,0,0.016620799899101257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,24,1,128,1,float16,fp8,0,0.017785599827766417
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,24,1,128,1,fp8,fp8,0,0.017510400712490083
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,24,2,128,1,float16,float16,0,0.01658879965543747
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,24,2,128,1,float16,fp8,0,0.0176816001534462
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,24,4,128,1,float16,float16,0,0.016638399660587312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,24,4,128,1,float16,fp8,0,0.017772799730300902
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,24,4,128,1,fp8,fp8,0,0.016705599427223206
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,24,8,128,1,float16,float16,0,0.017262400686740877
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,24,8,128,1,float16,fp8,0,0.01672479957342148
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,24,8,128,1,fp8,fp8,0,0.01727360039949417
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,24,24,128,1,float16,float16,0,0.012483199685811996
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,24,24,128,1,float16,fp8,0,0.012548799812793731
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,24,24,128,1,float16,float16,0,0.08756800293922425
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,24,24,128,1,fp8,fp8,0,0.012828800082206725
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,24,1,128,1,float16,float16,0,0.012425599992275238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,24,1,128,1,float16,fp8,0,0.012468799948692322
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,24,1,128,1,fp8,fp8,0,0.012494400143623352
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,24,2,128,1,float16,fp8,0,0.012438400089740754
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,24,2,128,1,fp8,fp8,0,0.0125231996178627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,24,4,128,1,float16,float16,0,0.012408000230789185
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,24,4,128,1,float16,fp8,0,0.01252480000257492
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,24,4,128,1,fp8,fp8,0,0.012467200309038163
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,24,8,128,1,float16,float16,0,0.01249760016798973
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,24,8,128,1,float16,fp8,0,0.012441600114107132
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,24,8,128,1,fp8,fp8,0,0.012518399953842163
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,24,24,128,1,float16,float16,0,0.012457600235939026
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,24,24,128,1,float16,fp8,0,0.012547199428081513
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,24,24,128,1,fp8,fp8,0,0.012449599802494049
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,24,1,128,1,float16,float16,0,0.012511999905109405
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,24,1,128,1,float16,fp8,0,0.011593600362539291
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,24,1,128,1,fp8,fp8,0,0.011115200072526931
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,24,2,128,1,float16,float16,0,0.011713600158691407
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,24,2,128,1,float16,fp8,0,0.011419200152158738
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,24,2,128,1,fp8,fp8,0,0.01244800016283989
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,24,4,128,1,float16,float16,0,0.010985600203275681
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,24,4,128,1,float16,fp8,0,0.012455999851226807
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,24,4,128,1,fp8,fp8,0,0.011371199786663056
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,24,8,128,1,float16,float16,0,0.012443199753761292
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,24,8,128,1,float16,fp8,0,0.011206399649381638
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,24,8,128,1,fp8,fp8,0,0.012398400157690049
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,24,24,128,1,float16,float16,0,0.012516799569129943
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,24,24,128,1,float16,fp8,0,0.01242239996790886
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,24,24,128,1,fp8,fp8,0,0.011681599915027619
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,24,1,128,1,float16,float16,0,0.01239679977297783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,24,1,128,1,float16,fp8,0,0.010500799864530563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,24,4,128,1,fp8,fp8,0,0.039211198687553406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,24,2,128,1,float16,float16,0,0.011505600064992905
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,24,2,128,1,float16,fp8,0,0.011416000127792359
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,24,2,128,1,fp8,fp8,0,0.012100800126791
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,24,4,128,1,float16,float16,0,0.010545600205659866
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,24,4,128,1,float16,fp8,0,0.01223519966006279
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,24,4,128,1,fp8,fp8,0,0.01151840016245842
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,24,8,128,1,float16,float16,0,0.011025600135326385
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,24,8,128,1,float16,fp8,0,0.011617600172758102
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,24,2,128,1,float16,fp8,0,0.022832000255584718
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,24,8,128,1,fp8,fp8,0,0.010497599840164185
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,24,1,128,1,float16,float16,0,0.1669648051261902
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,24,1,128,1,float16,fp8,0,0.17548799514770508
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,24,1,128,1,fp8,fp8,0,0.1748800039291382
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,24,2,128,1,float16,float16,0,0.16778719425201416
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,24,2,128,1,float16,fp8,0,0.17531199455261232
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,24,2,128,1,fp8,fp8,0,0.1757007956504822
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,24,4,128,1,float16,float16,0,0.1717535972595215
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,24,4,128,1,float16,fp8,0,0.1752560019493103
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,24,4,128,1,fp8,fp8,0,0.17549279928207398
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,24,2,128,1,fp8,fp8,0,0.017723199725151063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,24,8,128,1,float16,float16,0,0.1795904040336609
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,24,8,128,1,float16,fp8,0,0.17611680030822754
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,24,8,128,1,fp8,fp8,0,0.1750864028930664
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,24,24,128,1,fp8,fp8,0,0.11066399812698365
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,24,24,128,1,float16,fp8,0,0.10996320247650146
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,24,1,128,1,float16,float16,0,0.09013599753379822
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,24,1,128,1,float16,fp8,0,0.09209439754486085
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,24,1,128,1,fp8,fp8,0,0.0925167977809906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,24,2,128,1,float16,float16,0,0.09087039828300476
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,24,2,128,1,float16,fp8,0,0.09183840155601501
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,24,1,128,1,float16,float16,0,0.06321280002593994
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,24,2,128,1,float16,float16,0,0.0124719999730587
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,24,2,128,1,fp8,fp8,0,0.09257919788360595
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,24,4,128,1,float16,fp8,0,0.09281920194625855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,24,4,128,1,float16,float16,0,0.09292160272598267
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,24,4,128,1,fp8,fp8,0,0.09271199703216552
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,24,8,128,1,float16,float16,0,0.0970304012298584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,24,8,128,1,fp8,fp8,0,0.09301919937133789
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,24,8,128,1,float16,fp8,0,0.09303519725799561
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,24,24,128,1,float16,fp8,0,0.05961920022964477
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,24,1,128,1,float16,float16,0,0.04943839907646179
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,24,24,128,1,fp8,fp8,0,0.05957279801368713
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,24,1,128,1,float16,fp8,0,0.0514735996723175
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,24,1,128,1,fp8,fp8,0,0.05143679976463318
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,24,2,128,1,fp8,fp8,0,0.05137760043144226
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,24,2,128,1,float16,fp8,0,0.05144799947738647
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,24,4,128,1,float16,float16,0,0.05029439926147461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,24,4,128,1,float16,fp8,0,0.05145919919013977
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,24,8,128,1,float16,float16,0,0.053472000360488894
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,24,8,128,1,float16,fp8,0,0.051470398902893066
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,24,8,128,1,fp8,fp8,0,0.051239997148513794
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,24,24,128,1,float16,float16,0,0.0342848002910614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,24,24,128,1,fp8,fp8,0,0.03510560095310211
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,24,24,128,1,float16,fp8,0,0.034995201230049136
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,24,1,128,1,float16,float16,0,0.031068798899650574
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,24,1,128,1,float16,fp8,0,0.0309935986995697
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,24,1,128,1,fp8,fp8,0,0.03096800148487091
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,24,2,128,1,float16,float16,0,0.030025601387023926
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,24,2,128,1,float16,fp8,0,0.030929601192474364
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,24,2,128,1,fp8,fp8,0,0.031004801392555237
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,24,4,128,1,float16,float16,0,0.030817601084709167
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,24,4,128,1,float16,fp8,0,0.030987200140953065
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,24,8,128,1,float16,float16,0,0.03162719905376434
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,24,8,128,1,float16,fp8,0,0.030963200330734252
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,24,8,128,1,fp8,fp8,0,0.030956798791885377
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,24,24,128,1,float16,float16,0,0.02069920003414154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,24,24,128,1,float16,fp8,0,0.02131360024213791
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,24,24,128,1,fp8,fp8,0,0.02146400064229965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,24,1,128,1,float16,float16,0,0.01871519982814789
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,24,1,128,1,float16,fp8,0,0.01863359957933426
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,24,1,128,1,fp8,fp8,0,0.018668800592422485
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,24,2,128,1,float16,float16,0,0.018670399487018586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,24,24,128,1,float16,float16,0,0.11334559917449952
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,24,2,128,1,fp8,fp8,0,0.01926880031824112
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,24,4,128,1,float16,float16,0,0.01866399943828583
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,24,4,128,1,float16,fp8,0,0.018884800374507904
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,24,4,128,1,fp8,fp8,0,0.019351999461650848
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,24,8,128,1,float16,float16,0,0.020420800149440765
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,24,8,128,1,float16,fp8,0,0.019377599656581878
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,24,24,128,1,float16,float16,0,0.06268640160560608
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,24,8,128,1,fp8,fp8,0,0.01889919936656952
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,24,24,128,1,float16,float16,0,0.016579200327396394
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,24,2,128,1,float16,float16,0,0.049886399507522584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,24,24,128,1,float16,fp8,0,0.016604800522327424
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,24,24,128,1,fp8,fp8,0,0.0165583997964859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,24,1,128,1,float16,float16,0,0.014550399780273438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,24,1,128,1,float16,fp8,0,0.01462240070104599
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,24,1,128,1,fp8,fp8,0,0.014563199877738953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,24,2,128,1,float16,float16,0,0.014542399346828461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,24,2,128,1,float16,fp8,0,0.014535999298095703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,24,4,128,1,float16,float16,0,0.014803199470043183
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,24,2,128,1,fp8,fp8,0,0.014567999541759491
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,24,4,128,1,float16,fp8,0,0.014552000164985656
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,24,4,128,1,fp8,fp8,0,0.01456640064716339
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,24,8,128,1,float16,float16,0,0.014547200500965118
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,24,8,128,1,fp8,fp8,0,0.014577600359916686
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,24,24,128,1,float16,float16,0,0.01175680011510849
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,24,24,128,1,float16,fp8,0,0.010569600015878677
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,24,1,128,1,float16,float16,0,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,24,24,128,1,fp8,fp8,0,0.011057599633932113
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,24,1,128,1,float16,fp8,0,0.010440000146627427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,24,1,128,1,fp8,fp8,0,0.010467199981212616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,24,2,128,1,float16,float16,0,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,24,1,128,1,fp8,fp8,0,0.012438400089740754
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,24,2,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,24,2,128,1,fp8,fp8,0,0.010505600273609162
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,24,4,128,1,float16,float16,0,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,24,4,128,1,fp8,fp8,0,0.030856001377105712
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,24,4,128,1,fp8,fp8,0,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,24,8,128,1,float16,float16,0,0.010833600163459777
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,24,8,128,1,float16,fp8,0,0.010411199927330018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,24,8,128,1,fp8,fp8,0,0.010444799810647965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,24,24,128,1,float16,float16,0,0.010902400314807891
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,24,24,128,1,float16,fp8,0,0.010494399815797806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,24,24,128,1,fp8,fp8,0,0.010496000200510025
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,24,1,128,1,float16,fp8,0,0.010388799756765366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,24,1,128,1,fp8,fp8,0,0.01048320010304451
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,24,2,128,1,float16,float16,0,0.010446400195360184
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,24,2,128,1,float16,fp8,0,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,24,4,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,24,4,128,1,fp8,fp8,0,0.010436800122261048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,24,4,128,1,float16,fp8,0,0.010444799810647965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,24,8,128,1,float16,float16,0,0.010505600273609162
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,24,8,128,1,float16,fp8,0,0.010390400141477584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,24,8,128,1,fp8,fp8,0,0.010436800122261048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,24,24,128,1,float16,float16,0,0.010409600287675857
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,24,24,128,1,float16,fp8,0,0.010407999902963639
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,24,24,128,1,fp8,fp8,0,0.010414399951696397
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,24,1,128,1,float16,fp8,0,0.01040479987859726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,24,1,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,24,2,128,1,float16,float16,0,0.010454399883747101
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,24,2,128,1,float16,fp8,0,0.0190528005361557
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,24,2,128,1,float16,fp8,0,0.01040000021457672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,24,2,128,1,fp8,fp8,0,0.010473600029945374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,24,4,128,1,float16,float16,0,0.010361599922180175
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,24,4,128,1,float16,fp8,0,0.01048159971833229
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,24,4,128,1,fp8,fp8,0,0.010393600165843963
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,24,8,128,1,float16,float16,0,0.010462400317192078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,24,8,128,1,float16,fp8,0,0.010375999659299851
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,24,8,128,1,fp8,fp8,0,0.010473600029945374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,24,1,128,1,float16,float16,0,0.1466431975364685
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,24,1,128,1,float16,fp8,0,0.14725760221481324
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,24,1,128,1,fp8,fp8,0,0.14694880247116088
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,24,8,128,1,float16,fp8,0,0.014582400023937226
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,24,2,128,1,float16,float16,0,0.1462272047996521
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,24,2,128,1,float16,fp8,0,0.14593759775161744
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,24,2,128,1,fp8,fp8,0,0.1478800058364868
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,24,4,128,1,float16,float16,0,0.1470368027687073
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,24,4,128,1,float16,fp8,0,0.01048320010304451
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,24,4,128,1,float16,fp8,0,0.14766720533370972
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,24,4,128,1,fp8,fp8,0,0.14640480279922485
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,24,8,128,1,float16,float16,0,0.15163040161132812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,24,8,128,1,float16,fp8,0,0.1480720043182373
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,24,1,128,1,float16,float16,0,0.010471999645233154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,24,24,128,1,float16,fp8,0,0.0875216007232666
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,24,2,128,1,fp8,fp8,0,0.010486400127410889
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,24,24,128,1,fp8,fp8,0,0.08638079762458802
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,24,1,128,1,float16,float16,0,0.0782480001449585
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,24,1,128,1,float16,fp8,0,0.07737119793891907
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,24,1,128,1,fp8,fp8,0,0.07748960256576538
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,24,2,128,1,float16,float16,0,0.07846879959106445
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,24,2,128,1,float16,fp8,0,0.07791680097579956
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,24,2,128,1,fp8,fp8,0,0.07752479910850525
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,24,1,128,1,float16,float16,0,0.010444799810647965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,24,4,128,1,float16,float16,0,0.0798479974269867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,24,4,128,1,float16,fp8,0,0.07777919769287109
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,24,4,128,1,fp8,fp8,0,0.07787039875984192
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,24,8,128,1,float16,float16,0,0.08361279964447021
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,24,8,128,1,float16,fp8,0,0.07809280157089234
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,24,24,128,1,float16,float16,0,0.04793919920921326
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,24,8,128,1,fp8,fp8,0,0.07849119901657105
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,24,24,128,1,float16,fp8,0,0.04812000095844269
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,24,24,128,1,fp8,fp8,0,0.0480320006608963
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,24,1,128,1,float16,float16,0,0.04425280094146729
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,24,1,128,1,float16,fp8,0,0.044091200828552245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,24,1,128,1,fp8,fp8,0,0.04392800033092499
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,24,2,128,1,float16,float16,0,0.04447839856147766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,24,2,128,1,float16,fp8,0,0.04386720061302185
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,24,2,128,1,fp8,fp8,0,0.043403199315071105
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,24,4,128,1,float16,float16,0,0.045179200172424314
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,24,4,128,1,fp8,fp8,0,0.04333760142326355
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,24,8,128,1,float16,float16,0,0.04561600089073181
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,24,8,128,1,float16,fp8,0,0.043996798992156985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,24,8,128,1,fp8,fp8,0,0.043377599120140074
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,24,24,128,1,float16,float16,0,0.028881600499153136
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,24,24,128,1,float16,fp8,0,0.028867200016975403
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,24,24,128,1,fp8,fp8,0,0.02884320020675659
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,24,1,128,1,float16,float16,0,0.026881599426269533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,24,1,128,1,fp8,fp8,0,0.026872000098228453
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,24,1,128,1,float16,fp8,0,0.026932799816131593
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,24,2,128,1,float16,fp8,0,0.02675040066242218
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,24,2,128,1,float16,float16,0,0.02712000012397766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,24,4,128,1,float16,float16,0,0.02688319981098175
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,24,4,128,1,float16,fp8,0,0.026790401339530943
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,24,4,128,1,fp8,fp8,0,0.026873600482940675
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,24,4,128,1,fp8,fp8,0,0.0514415979385376
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,24,8,128,1,float16,float16,0,0.02842240035533905
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,24,8,128,1,float16,fp8,0,0.02688960134983063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,24,24,128,1,float16,float16,0,0.018615999817848207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,24,8,128,1,fp8,fp8,0,0.02690880000591278
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,24,24,128,1,float16,fp8,0,0.018590399622917177
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,24,24,128,1,fp8,fp8,0,0.018555200099945067
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,24,1,128,1,fp8,fp8,0,0.016633599996566772
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,24,1,128,1,float16,fp8,0,0.01717119961977005
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,24,2,128,1,float16,float16,0,0.01664000004529953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,24,2,128,1,float16,fp8,0,0.017136000096797943
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,24,4,128,1,float16,float16,0,0.0173567995429039
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,24,4,128,1,float16,fp8,0,0.01727039963006973
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,24,4,128,1,fp8,fp8,0,0.017140799760818483
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,24,8,128,1,float16,float16,0,0.01860159933567047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,24,8,128,1,float16,fp8,0,0.017132799327373504
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,24,8,128,1,fp8,fp8,0,0.0166143998503685
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,24,24,128,1,float16,float16,0,0.014507199823856353
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,24,8,128,1,fp8,fp8,0,0.1476912021636963
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,24,24,128,1,float16,fp8,0,0.01456640064716339
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,24,1,128,1,float16,float16,0,0.013569599390029908
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,24,24,128,1,float16,float16,0,0.09013440012931824
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,24,1,128,1,fp8,fp8,0,0.014473600685596466
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,24,2,128,1,float16,fp8,0,0.014588800072669984
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,24,2,128,1,fp8,fp8,0,0.014486399292945863
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,24,4,128,1,float16,float16,0,0.013860799372196198
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,24,4,128,1,float16,fp8,0,0.014588800072669984
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,24,4,128,1,fp8,fp8,0,0.014552000164985656
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,24,8,128,1,float16,float16,0,0.012758399546146392
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,24,8,128,1,float16,fp8,0,0.013676799833774567
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,24,8,128,1,fp8,fp8,0,0.013670399785041809
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,24,24,128,1,float16,float16,0,0.010496000200510025
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,24,24,128,1,float16,fp8,0,0.010446400195360184
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,24,4,128,1,float16,fp8,0,0.04369440078735352
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,24,24,128,1,fp8,fp8,0,0.010463999956846238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,24,1,128,1,float16,float16,0,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,24,1,128,1,float16,fp8,0,0.010494399815797806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,24,2,128,1,float16,float16,0,0.010476800054311753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,24,1,128,1,fp8,fp8,0,0.01048159971833229
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,24,2,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,24,2,128,1,fp8,fp8,0,0.010491199791431427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,24,2,128,1,fp8,fp8,0,0.0268528014421463
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,24,4,128,1,float16,fp8,0,0.010480000078678131
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,24,4,128,1,fp8,fp8,0,0.010476800054311753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,24,8,128,1,float16,float16,0,0.010419200360774993
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,24,8,128,1,float16,fp8,0,0.010467199981212616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,24,8,128,1,fp8,fp8,0,0.010462400317192078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,24,24,128,1,float16,float16,0,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,24,1,128,1,float16,float16,0,0.017455999553203583
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,24,24,128,1,float16,fp8,0,0.010438399761915207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,24,24,128,1,fp8,fp8,0,0.01048159971833229
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,24,1,128,1,float16,float16,0,0.0103472001850605
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,24,1,128,1,float16,fp8,0,0.010473600029945374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,24,1,128,1,fp8,fp8,0,0.010452800244092942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,24,2,128,1,float16,float16,0,0.010363200306892395
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,24,2,128,1,fp8,fp8,0,0.01664319932460785
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,24,2,128,1,float16,fp8,0,0.010387200117111205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,24,2,128,1,fp8,fp8,0,0.010377600044012069
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,24,4,128,1,float16,float16,0,0.010443200170993806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,24,4,128,1,float16,fp8,0,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,24,4,128,1,fp8,fp8,0,0.010334400087594986
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,24,8,128,1,float16,float16,0,0.01043360009789467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,24,8,128,1,fp8,fp8,0,0.010372799634933472
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,24,24,128,1,float16,float16,0,0.010390400141477584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,24,24,128,1,float16,fp8,0,0.01043199971318245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,24,24,128,1,fp8,fp8,0,0.010447999835014344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,24,1,128,1,float16,float16,0,0.008951999992132188
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,24,1,128,1,float16,fp8,0,0.010385599732398988
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,24,1,128,1,fp8,fp8,0,0.010388799756765366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,24,2,128,1,float16,float16,0,0.010387200117111205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,24,2,128,1,float16,fp8,0,0.010407999902963639
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,24,2,128,1,fp8,fp8,0,0.010377600044012069
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,24,24,128,1,fp8,fp8,0,0.014481599628925323
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,24,4,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,24,4,128,1,float16,fp8,0,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,24,4,128,1,fp8,fp8,0,0.010398399829864503
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,24,1,128,1,float16,fp8,0,0.013553600013256072
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,24,8,128,1,fp8,fp8,0,0.010367999970912933
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,24,2,128,1,float16,float16,0,0.01417279988527298
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,24,8,128,1,float16,fp8,0,0.010393600165843963
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,24,1,128,1,float16,float16,0,0.14056639671325682
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,24,1,128,1,float16,fp8,0,0.13416320085525513
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,24,2,128,1,float16,fp8,0,0.13447680473327636
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,24,2,128,1,fp8,fp8,0,0.13383359909057618
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,24,4,128,1,float16,float16,0,0.1411967992782593
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,24,4,128,1,float16,fp8,0,0.13417600393295287
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,24,4,128,1,fp8,fp8,0,0.13414080142974855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,24,8,128,1,float16,float16,0,0.14424959421157837
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,24,8,128,1,float16,fp8,0,0.1344864010810852
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,24,8,128,1,fp8,fp8,0,0.1341856002807617
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,24,24,128,1,float16,fp8,0,0.07661759853363037
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,24,4,128,1,float16,float16,0,0.010468800365924836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,24,1,128,1,float16,float16,0,0.07542240023612976
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,24,1,128,1,float16,fp8,0,0.07197759747505188
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,24,1,128,1,fp8,fp8,0,0.07180320024490357
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,24,2,128,1,float16,float16,0,0.07563520073890687
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,24,2,128,1,float16,fp8,0,0.07216320037841797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,24,2,128,1,fp8,fp8,0,0.07187039852142334
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,24,4,128,1,float16,float16,0,0.07669439911842346
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,24,4,128,1,float16,fp8,0,0.07221440076828003
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,24,4,128,1,fp8,fp8,0,0.07222399711608887
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,24,8,128,1,float16,float16,0,0.07750080227851867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,24,8,128,1,float16,fp8,0,0.07224159836769103
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,24,8,128,1,float16,float16,0,0.010731200128793717
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,24,8,128,1,fp8,fp8,0,0.07225279808044434
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,24,24,128,1,float16,fp8,0,0.043243199586868286
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,24,24,128,1,float16,float16,0,0.04583359956741333
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,24,1,128,1,fp8,fp8,0,0.13404799699783326
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,24,1,128,1,float16,float16,0,0.043647998571395875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,24,1,128,1,fp8,fp8,0,0.04116959869861603
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,24,2,128,1,float16,float16,0,0.043566399812698366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,24,2,128,1,float16,fp8,0,0.041201600432395936
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,24,2,128,1,fp8,fp8,0,0.04124319851398468
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,24,4,128,1,float16,float16,0,0.04397920072078705
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,24,4,128,1,float16,fp8,0,0.04124000072479248
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,24,4,128,1,fp8,fp8,0,0.041440001130104064
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,24,8,128,1,float16,fp8,0,0.04133920073509216
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,24,24,128,1,float16,float16,0,0.081004798412323
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,24,8,128,1,fp8,fp8,0,0.04143039882183075
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,24,24,128,1,float16,float16,0,0.027011200785636902
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,24,24,128,1,fp8,fp8,0,0.07641760110855103
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,24,24,128,1,float16,fp8,0,0.026800000667572023
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,24,24,128,1,fp8,fp8,0,0.02688480019569397
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,24,1,128,1,float16,float16,0,0.02680160105228424
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,24,1,128,1,float16,fp8,0,0.02478879988193512
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,24,1,128,1,fp8,fp8,0,0.02489279955625534
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,24,2,128,1,float16,float16,0,0.02664479911327362
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,24,2,128,1,float16,fp8,0,0.024846400320529937
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,24,4,128,1,float16,float16,0,0.026812800765037538
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,24,4,128,1,float16,fp8,0,0.02483839988708496
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,24,4,128,1,fp8,fp8,0,0.02481919974088669
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,24,8,128,1,float16,float16,0,0.026870399713516235
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,24,8,128,1,float16,fp8,0,0.024902400374412537
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,24,8,128,1,fp8,fp8,0,0.025348800420761108
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,24,24,128,1,float16,float16,0,0.018131199479103088
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,24,24,128,1,float16,fp8,0,0.016654400527477263
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,24,24,128,1,fp8,fp8,0,0.01656000018119812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,24,1,128,1,float16,fp8,0,0.01653600037097931
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,24,1,128,1,float16,float16,0,0.016545599699020384
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,24,1,128,1,fp8,fp8,0,0.01661120057106018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,24,2,128,1,float16,float16,0,0.016631999611854555
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,24,8,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,24,2,128,1,fp8,fp8,0,0.016574400663375854
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,24,2,128,1,float16,fp8,0,0.016548800468444824
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,24,4,128,1,float16,float16,0,0.016590400040149687
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,24,4,128,1,float16,fp8,0,0.016523200273513793
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,24,4,128,1,fp8,fp8,0,0.016606399416923524
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,24,8,128,1,float16,float16,0,0.0166143998503685
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,24,8,128,1,float16,fp8,0,0.01658560037612915
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,24,8,128,1,fp8,fp8,0,0.016599999368190767
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,24,24,128,1,float16,float16,0,0.013324800133705138
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,24,24,128,1,float16,fp8,0,0.012460800260305405
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,24,24,128,1,fp8,fp8,0,0.013303999602794648
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,24,1,128,1,float16,float16,0,0.012481600046157837
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,24,1,128,1,float16,fp8,0,0.012598399817943574
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,24,1,128,1,fp8,fp8,0,0.012532800436019897
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,24,2,128,1,float16,float16,0,0.012707200646400452
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,24,2,128,1,float16,fp8,0,0.012539200484752655
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,24,2,128,1,fp8,fp8,0,0.012753599882125854
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,24,4,128,1,float16,float16,0,0.012540799379348756
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,24,4,128,1,float16,fp8,0,0.012507200241088867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,24,4,128,1,fp8,fp8,0,0.012468799948692322
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,24,8,128,1,float16,float16,0,0.01273919939994812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,24,8,128,1,float16,fp8,0,0.01244800016283989
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,24,1,128,1,float16,fp8,0,0.041249600052833554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,24,8,128,1,fp8,fp8,0,0.012678399682044983
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,24,24,128,1,float16,float16,0,0.010414399951696397
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,24,24,128,1,float16,fp8,0,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,24,24,128,1,fp8,fp8,0,0.010428799688816071
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,24,1,128,1,float16,float16,0,0.010369600355625152
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,24,1,128,1,float16,fp8,0,0.010487999767065048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,24,1,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,24,2,128,1,float16,float16,0,0.010454399883747101
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,24,2,128,1,float16,fp8,0,0.01045759990811348
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,24,4,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,24,4,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,24,4,128,1,fp8,fp8,0,0.010407999902963639
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,24,8,128,1,float16,float16,0,0.010500799864530563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,24,8,128,1,float16,float16,0,0.044763201475143434
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,24,8,128,1,fp8,fp8,0,0.010491199791431427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,24,24,128,1,float16,float16,0,0.010385599732398988
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,24,24,128,1,float16,fp8,0,0.010465600341558457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,24,24,128,1,fp8,fp8,0,0.009515199810266495
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,24,1,128,1,float16,fp8,0,0.008448000252246856
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,24,1,128,1,fp8,fp8,0,0.008420799672603608
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,24,2,128,1,float16,float16,0,0.010393600165843963
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,24,2,128,1,float16,fp8,0,0.010291200131177902
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,24,2,128,1,fp8,fp8,0,0.02495039999485016
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,24,2,128,1,fp8,fp8,0,0.01037919968366623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,24,4,128,1,float16,float16,0,0.010441599786281586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,24,4,128,1,fp8,fp8,0,0.010486400127410889
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,24,8,128,1,float16,float16,0,0.010499200224876404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,24,8,128,1,float16,fp8,0,0.0105103999376297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,24,8,128,1,fp8,fp8,0,0.00921280011534691
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,24,24,128,1,float16,float16,0,0.010468800365924836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,24,24,128,1,float16,fp8,0,0.009190399944782258
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,24,24,128,1,fp8,fp8,0,0.008448000252246856
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,24,1,128,1,float16,float16,0,0.010203199833631516
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,24,1,128,1,float16,fp8,0,0.008448000252246856
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,24,1,128,1,fp8,fp8,0,0.008454400300979614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,24,2,128,1,float16,float16,0,0.008564800024032593
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,24,2,128,1,fp8,fp8,0,0.008475200086832047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,24,2,128,1,float16,fp8,0,0.008452799916267396
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,24,4,128,1,float16,float16,0,0.008665599673986436
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,24,4,128,1,float16,fp8,0,0.008408000320196151
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,24,4,128,1,fp8,fp8,0,0.008462399989366532
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,24,8,128,1,float16,float16,0,0.010462400317192078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,24,8,128,1,float16,fp8,0,0.010326399654150008
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,24,8,128,1,fp8,fp8,0,0.010406400263309478
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,24,1,128,1,float16,fp8,0,0.12913600206375123
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,24,24,128,1,fp8,fp8,0,0.043270400166511534
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,24,1,128,1,fp8,fp8,0,0.12869919538497926
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,24,2,128,1,float16,float16,0,0.13677120208740234
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,24,2,128,1,float16,fp8,0,0.12871040105819703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,24,2,128,1,fp8,fp8,0,0.128438401222229
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,24,2,128,1,fp8,fp8,0,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,24,4,128,1,float16,float16,0,0.1376736044883728
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,24,4,128,1,float16,fp8,0,0.12842080593109131
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,24,4,128,1,fp8,fp8,0,0.12953280210494994
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,24,8,128,1,float16,fp8,0,0.010390400141477584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,24,8,128,1,float16,fp8,0,0.12848960161209105
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,24,8,128,1,fp8,fp8,0,0.12869280576705933
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,24,24,128,1,float16,float16,0,0.07580959796905518
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,24,24,128,1,float16,fp8,0,0.06999679803848266
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,24,1,128,1,float16,float16,0,0.010438399761915207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,24,1,128,1,float16,float16,0,0.07480639815330506
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,24,1,128,1,fp8,fp8,0,0.06942880153656006
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,24,2,128,1,float16,float16,0,0.07477599978446961
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,24,2,128,1,float16,fp8,0,0.06963359713554382
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,24,2,128,1,fp8,fp8,0,0.06887999773025513
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,24,4,128,1,float16,fp8,0,0.0691375970840454
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,24,4,128,1,float16,fp8,0,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,24,4,128,1,fp8,fp8,0,0.06924319863319398
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,24,8,128,1,float16,float16,0,0.07521119713783264
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,24,8,128,1,float16,fp8,0,0.06959519982337951
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,24,24,128,1,float16,float16,0,0.04312799870967865
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,24,24,128,1,float16,fp8,0,0.03947199881076813
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,24,24,128,1,fp8,fp8,0,0.03957119882106781
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,24,1,128,1,float16,float16,0,0.04259839951992035
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,24,1,128,1,float16,fp8,0,0.03917919993400574
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,24,1,128,1,fp8,fp8,0,0.03918719887733459
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,24,2,128,1,float16,float16,0,0.04241439998149872
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,24,2,128,1,float16,fp8,0,0.039175999164581296
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,24,1,128,1,float16,float16,0,0.1386944055557251
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,24,2,128,1,float16,float16,0,0.1388479948043823
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,24,2,128,1,fp8,fp8,0,0.0399071991443634
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,24,4,128,1,float16,float16,0,0.043166399002075195
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,24,4,128,1,float16,fp8,0,0.04009599983692169
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,24,4,128,1,fp8,fp8,0,0.03949759900569916
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,24,8,128,1,float16,float16,0,0.043182399868965146
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,24,8,128,1,float16,fp8,0,0.039284801483154295
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,24,8,128,1,fp8,fp8,0,0.03980799913406372
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,24,24,128,1,float16,float16,0,0.026796799898147584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,24,24,128,1,float16,fp8,0,0.024719999730587007
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,24,24,128,1,fp8,fp8,0,0.024796800315380098
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,24,1,128,1,float16,float16,0,0.025736001133918763
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,24,1,128,1,float16,fp8,0,0.024784000217914583
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,24,8,128,1,float16,float16,0,0.13769439458847046
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,24,1,128,1,fp8,fp8,0,0.024748800694942473
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,24,2,128,1,float16,fp8,0,0.024771200120449068
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,24,2,128,1,fp8,fp8,0,0.024732799828052522
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,24,4,128,1,float16,float16,0,0.02520799934864044
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,24,4,128,1,float16,fp8,0,0.024718399345874786
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,24,24,128,1,fp8,fp8,0,0.07003520131111145
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,24,1,128,1,float16,fp8,0,0.06924319863319398
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,24,4,128,1,fp8,fp8,0,0.02480800002813339
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,24,8,128,1,float16,float16,0,0.026657599210739135
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,24,8,128,1,float16,fp8,0,0.02481600046157837
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,24,24,128,1,float16,float16,0,0.018636800348758698
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,24,24,128,1,float16,fp8,0,0.01655520051717758
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,24,1,128,1,float16,float16,0,0.01682559996843338
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,24,1,128,1,float16,fp8,0,0.016547200083732606
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,24,1,128,1,fp8,fp8,0,0.016607999801635742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,24,2,128,1,float16,float16,0,0.016627199947834015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,24,4,128,1,float16,float16,0,0.07471519708633423
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,24,2,128,1,float16,fp8,0,0.016616000235080718
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,24,2,128,1,fp8,fp8,0,0.016547200083732606
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,24,4,128,1,float16,float16,0,0.016564799845218657
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,24,8,128,1,float16,float16,0,0.01659359931945801
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,24,4,128,1,float16,fp8,0,0.01655520051717758
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,24,4,128,1,fp8,fp8,0,0.016531200706958772
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,24,8,128,1,float16,fp8,0,0.016539199650287627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,24,8,128,1,fp8,fp8,0,0.069651198387146
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,24,8,128,1,fp8,fp8,0,0.016599999368190767
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,24,24,128,1,float16,float16,0,0.012724800407886505
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,24,24,128,1,float16,fp8,0,0.012513600289821625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,24,1,128,1,float16,float16,0,0.012510399520397186
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,24,1,128,1,float16,fp8,0,0.012449599802494049
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,24,1,128,1,fp8,fp8,0,0.012492799758911132
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,24,2,128,1,float16,float16,0,0.012529599666595458
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,24,2,128,1,float16,fp8,0,0.012443199753761292
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,24,2,128,1,fp8,fp8,0,0.012478400021791458
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,24,4,128,1,float16,float16,0,0.012542399764060973
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,24,4,128,1,float16,fp8,0,0.012535999715328216
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,24,4,128,1,fp8,fp8,0,0.01249760016798973
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,24,8,128,1,float16,float16,0,0.012806400656700134
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,24,8,128,1,float16,fp8,0,0.012464000284671784
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,24,8,128,1,fp8,fp8,0,0.0125231996178627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,24,24,128,1,float16,fp8,0,0.010449600219726563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,24,24,128,1,fp8,fp8,0,0.01045759990811348
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,24,1,128,1,float16,float16,0,0.010473600029945374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,24,1,128,1,float16,fp8,0,0.010476800054311753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,24,1,128,1,fp8,fp8,0,0.010414399951696397
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,24,2,128,1,float16,float16,0,0.010446400195360184
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,24,2,128,1,float16,fp8,0,0.01079839989542961
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,24,2,128,1,fp8,fp8,0,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,24,4,128,1,float16,float16,0,0.010387200117111205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,24,4,128,1,fp8,fp8,0,0.010384000092744827
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,24,4,128,1,float16,fp8,0,0.010441599786281586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,24,8,128,1,float16,float16,0,0.010449600219726563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,24,8,128,1,float16,fp8,0,0.01043199971318245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,24,8,128,1,fp8,fp8,0,0.010366400331258773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,24,24,128,1,float16,fp8,0,0.008980800211429597
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,24,24,128,1,fp8,fp8,0,0.008849599957466125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,24,1,128,1,float16,float16,0,0.010380800068378448
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,24,1,128,1,float16,fp8,0,0.00944959968328476
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,24,1,128,1,fp8,fp8,0,0.009092800319194794
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,24,2,128,1,float16,float16,0,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,24,2,128,1,float16,float16,0,0.025766399502754212
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,24,2,128,1,float16,fp8,0,0.0094991996884346
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,24,2,128,1,fp8,fp8,0,0.009804800152778625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,24,4,128,1,float16,fp8,0,0.010311999917030334
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,24,4,128,1,fp8,fp8,0,0.010369600355625152
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,24,8,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,24,24,128,1,fp8,fp8,0,0.01660960018634796
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,24,8,128,1,float16,fp8,0,0.010480000078678131
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,24,8,128,1,fp8,fp8,0,0.00840959995985031
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,24,24,128,1,float16,float16,0,0.010462400317192078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,24,24,128,1,float16,fp8,0,0.00862239971756935
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,24,24,128,1,fp8,fp8,0,0.010412800312042236
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,24,1,128,1,float16,float16,0,0.009513600170612336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,24,1,128,1,fp8,fp8,0,0.008489599823951722
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,24,2,128,1,float16,float16,0,0.009438399970531464
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,24,1,128,1,float16,fp8,0,0.008561599999666214
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,24,2,128,1,float16,fp8,0,0.009571199864149093
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,24,2,128,1,fp8,fp8,0,0.008843199908733368
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,24,4,128,1,float16,float16,0,0.009884800016880035
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,24,4,128,1,fp8,fp8,0,0.008393599838018417
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,24,8,128,1,float16,float16,0,0.010215999931097031
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,24,8,128,1,float16,fp8,0,0.009310399740934372
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,24,24,128,1,fp8,fp8,0,0.012571200728416443
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,24,24,128,1,float16,float16,0,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,24,24,128,1,float16,float16,0,0.0104032002389431
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,24,4,128,1,float16,float16,0,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,24,8,128,1,fp8,fp8,0,0.024792000651359558
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,24,4,128,1,float16,fp8,0,0.009785600006580353
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,24,8,128,1,fp8,fp8,0,0.009403199702501298
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,1,128,1,float16,fp8,0,5.738873672485352
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,1,128,1,fp8,fp8,0,5.6234081268310545
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,1,128,1,float16,float16,0,7.161780548095703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,2,128,1,float16,fp8,0,5.706889724731445
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,2,128,1,fp8,fp8,0,5.710204696655273
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,2,128,1,float16,float16,0,7.299406433105469
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,4,128,1,float16,float16,0,7.329917144775391
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,4,128,1,float16,fp8,0,5.803747177124023
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,4,128,1,fp8,fp8,0,5.677735900878906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,8,128,1,float16,fp8,0,5.752766418457031
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,8,128,1,float16,float16,0,7.279777526855469
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,8,128,1,fp8,fp8,0,5.813273620605469
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,1,128,1,float16,fp8,0,2.876091194152832
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,1,128,1,float16,float16,0,3.5831424713134767
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,16,128,1,float16,fp8,0,2.8993951797485353
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,1,128,1,fp8,fp8,0,2.909929656982422
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,16,128,1,fp8,fp8,0,2.9376991271972654
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,2,128,1,float16,fp8,0,2.9137599945068358
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,2,128,1,fp8,fp8,0,2.953374481201172
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,2,128,1,float16,float16,0,3.7555118560791017
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,4,128,1,float16,fp8,0,2.941703987121582
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,4,128,1,fp8,fp8,0,2.8709295272827147
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,4,128,1,float16,float16,0,3.8640846252441405
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,16,128,1,float16,fp8,0,1.5594816207885742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,8,128,1,float16,float16,0,3.6680225372314452
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,8,128,1,float16,fp8,0,2.9768943786621094
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,16,128,1,fp8,fp8,0,1.5424912452697754
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,8,128,1,fp8,fp8,0,2.9596704483032226
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,1,128,1,float16,float16,0,1.9519744873046876
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,1,128,1,float16,fp8,0,1.4991264343261719
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,1,128,1,fp8,fp8,0,1.8504623413085937
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,2,128,1,float16,float16,0,1.7632783889770507
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,2,128,1,float16,fp8,0,1.5005023956298829
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,2,128,1,fp8,fp8,0,1.5590335845947265
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,4,128,1,float16,fp8,0,1.4763296127319336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,4,128,1,float16,float16,0,1.9195024490356445
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,4,128,1,fp8,fp8,0,1.4974767684936523
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,8,128,1,float16,fp8,0,1.5017312049865723
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,8,128,1,fp8,fp8,0,1.551806354522705
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,16,128,1,float16,fp8,0,1.0463775634765624
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,8,128,1,float16,float16,0,2.0327808380126955
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,16,128,1,fp8,fp8,0,0.8297568321228027
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,1,128,1,float16,float16,0,0.9071215629577637
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,1,128,1,float16,fp8,0,0.8295104026794433
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,1,128,1,fp8,fp8,0,0.8704192161560058
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,2,128,1,float16,fp8,0,0.799780797958374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,2,128,1,float16,float16,0,1.0874544143676759
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,2,128,1,fp8,fp8,0,0.8480959892272949
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,4,128,1,float16,float16,0,0.9118096351623535
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,4,128,1,float16,fp8,0,0.8228351593017578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,4,128,1,fp8,fp8,0,0.8001536369323731
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,8,128,1,float16,float16,0,1.0865008354187011
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,8,128,1,fp8,fp8,0,0.8106047630310058
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,1,128,1,float16,fp8,0,3.340411376953125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,1,128,1,float16,float16,0,4.063471984863281
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,1,128,1,fp8,fp8,0,3.4544704437255858
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,8,128,1,float16,fp8,0,0.8285903930664062
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,2,128,1,float16,float16,0,4.1742401123046875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,2,128,1,float16,fp8,0,3.356796646118164
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,4,128,1,float16,fp8,0,3.283980941772461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,2,128,1,fp8,fp8,0,3.645008087158203
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,4,128,1,float16,float16,0,4.255177688598633
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,4,128,1,fp8,fp8,0,3.6262096405029296
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,16,128,1,float16,fp8,0,1.7507007598876954
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,8,128,1,float16,fp8,0,3.326358413696289
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,8,128,1,float16,float16,0,4.243012619018555
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,8,128,1,fp8,fp8,0,3.3764926910400392
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,16,128,1,fp8,fp8,0,1.7553056716918944
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,1,128,1,float16,float16,0,1.9458431243896483
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,1,128,1,fp8,fp8,0,1.726755142211914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,1,128,1,float16,fp8,0,2.0136064529418944
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,2,128,1,float16,float16,0,1.909984016418457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,2,128,1,fp8,fp8,0,1.7159744262695313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,2,128,1,float16,fp8,0,2.0417728424072266
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,4,128,1,float16,float16,0,2.030617523193359
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,4,128,1,float16,fp8,0,1.9146896362304688
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,4,128,1,fp8,fp8,0,1.7170656204223633
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,8,128,1,float16,float16,0,1.9983264923095703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,8,128,1,float16,fp8,0,2.0036384582519533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,8,128,1,fp8,fp8,0,1.7497039794921876
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,16,128,1,float16,fp8,0,1.0749279975891113
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,16,128,1,fp8,fp8,0,1.0122447967529298
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,1,128,1,float16,fp8,0,0.936961555480957
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,1,128,1,fp8,fp8,0,0.8862959861755371
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,2,128,1,float16,float16,0,0.9977024078369141
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,2,128,1,float16,fp8,0,0.8889727592468262
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,2,128,1,fp8,fp8,0,0.8950127601623535
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,4,128,1,float16,float16,0,0.997873592376709
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,4,128,1,float16,fp8,0,0.9243760108947754
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,4,128,1,fp8,fp8,0,0.8784432411193848
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,1,128,1,float16,float16,0,0.994422435760498
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,8,128,1,float16,float16,0,1.027667236328125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,8,128,1,fp8,fp8,0,0.8785872459411621
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,16,128,1,float16,fp8,0,0.5034080028533936
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,16,128,1,fp8,fp8,0,0.5032495975494384
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,1,128,1,float16,float16,0,0.5314320087432861
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,8,128,1,float16,fp8,0,0.9508848190307617
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,1,128,1,float16,fp8,0,0.4860623836517334
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,1,128,1,fp8,fp8,0,0.4865888118743896
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,2,128,1,float16,float16,0,0.532476806640625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,2,128,1,float16,fp8,0,0.48762078285217286
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,2,128,1,fp8,fp8,0,0.4867087841033936
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,4,128,1,float16,float16,0,0.5383423805236817
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,4,128,1,float16,fp8,0,0.487886381149292
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,4,128,1,fp8,fp8,0,0.4871520042419434
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,8,128,1,float16,fp8,0,0.4885871887207031
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,8,128,1,float16,float16,0,0.545689582824707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,8,128,1,fp8,fp8,0,0.48893280029296876
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,1,128,1,float16,float16,0,2.763849639892578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,1,128,1,float16,fp8,0,2.312198448181152
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,1,128,1,fp8,fp8,0,2.7255264282226563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,2,128,1,float16,float16,0,2.7682031631469726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,2,128,1,float16,fp8,0,2.361350440979004
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,2,128,1,fp8,fp8,0,2.6776336669921874
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,4,128,1,float16,float16,0,2.8450176239013674
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,4,128,1,float16,fp8,0,2.331164741516113
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,4,128,1,fp8,fp8,0,2.703432083129883
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,8,128,1,float16,float16,0,2.9947536468505858
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,8,128,1,float16,fp8,0,2.369824028015137
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,8,128,1,fp8,fp8,0,2.788835144042969
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,16,128,1,float16,fp8,0,1.2795056343078612
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,16,128,1,fp8,fp8,0,1.2487711906433105
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,1,128,1,float16,float16,0,1.3556063652038575
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,1,128,1,float16,fp8,0,1.357686424255371
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,1,128,1,fp8,fp8,0,1.266660785675049
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,2,128,1,float16,float16,0,1.3635215759277344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,2,128,1,float16,fp8,0,1.4260479927062988
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,2,128,1,fp8,fp8,0,1.261729621887207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,4,128,1,float16,float16,0,1.4045087814331054
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,4,128,1,float16,fp8,0,1.432164764404297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,4,128,1,fp8,fp8,0,1.2691776275634765
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,8,128,1,float16,float16,0,1.4127296447753905
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,8,128,1,float16,fp8,0,1.51527681350708
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,8,128,1,fp8,fp8,0,1.3215503692626953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,16,128,1,float16,float16,0,0.5659584045410156
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,16,128,1,float16,fp8,0,0.6631904125213623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,16,128,1,float16,float16,0,2.060408020019531
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,16,128,1,fp8,fp8,0,0.6632063865661622
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,1,128,1,float16,float16,0,0.8264944076538085
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,16,128,1,float16,float16,0,3.6237823486328127
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,1,128,1,float16,fp8,0,0.6397456169128418
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,1,128,1,fp8,fp8,0,0.7360112190246582
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,2,128,1,float16,float16,0,0.8554816246032715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,16,128,1,float16,float16,0,0.943665599822998
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,16,128,1,float16,float16,0,1.7617647171020507
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,2,128,1,fp8,fp8,0,0.7096735954284668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,4,128,1,float16,fp8,0,0.6388448238372803
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,4,128,1,float16,float16,0,0.8103872299194336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,4,128,1,fp8,fp8,0,0.6382751941680909
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,16,128,1,float16,float16,0,1.4500016212463378
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,8,128,1,float16,float16,0,0.7393199920654296
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,16,128,1,float16,float16,0,0.42470879554748536
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,8,128,1,float16,fp8,0,0.6652463912963867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,16,128,1,float16,fp8,0,0.4349936008453369
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,16,128,1,float16,float16,0,1.0518159866333008
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,8,128,1,fp8,fp8,0,0.7167471885681153
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,1,128,1,float16,float16,0,0.3916800022125244
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,16,128,1,fp8,fp8,0,0.383785605430603
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,1,128,1,fp8,fp8,0,0.38949921131134035
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,2,128,1,float16,float16,0,0.3865855932235718
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,2,128,1,float16,fp8,0,0.39374239444732667
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,2,128,1,fp8,fp8,0,0.3753488063812256
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,4,128,1,float16,float16,0,0.3993247985839844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,4,128,1,float16,fp8,0,0.3903503894805908
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,16,128,1,float16,float16,0,0.7554128170013428
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,8,128,1,float16,float16,0,0.42113118171691893
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,8,128,1,float16,fp8,0,0.37669920921325684
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,8,128,1,fp8,fp8,0,0.3800672054290771
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,2,128,1,float16,fp8,0,0.6394239902496338
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,1,128,1,float16,fp8,0,0.365064001083374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,1,128,1,float16,fp8,0,3.011911964416504
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,1,128,1,fp8,fp8,0,3.074363136291504
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,2,128,1,float16,fp8,0,3.0492496490478516
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,1,128,1,float16,float16,0,3.678505706787109
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,4,128,1,fp8,fp8,0,0.36025280952453614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,2,128,1,float16,float16,0,3.686667251586914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,2,128,1,fp8,fp8,0,3.011790466308594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,16,128,1,float16,fp8,0,1.6271295547485352
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,16,128,1,float16,float16,0,2.0467903137207033
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,4,128,1,float16,fp8,0,3.0921920776367187
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,4,128,1,fp8,fp8,0,3.0728288650512696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,4,128,1,float16,float16,0,3.7577888488769533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,8,128,1,fp8,fp8,0,3.0577791213989256
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,8,128,1,float16,fp8,0,3.201830291748047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,8,128,1,float16,float16,0,3.9127391815185546
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,16,128,1,fp8,fp8,0,1.6394128799438477
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,1,128,1,float16,fp8,0,1.6005472183227538
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,1,128,1,float16,float16,0,1.8681007385253907
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,1,128,1,fp8,fp8,0,1.5600576400756836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,2,128,1,float16,fp8,0,1.5911104202270507
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,2,128,1,fp8,fp8,0,1.5846351623535155
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,2,128,1,float16,float16,0,1.9565872192382812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,4,128,1,float16,fp8,0,1.5594736099243165
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,4,128,1,float16,float16,0,1.9836912155151367
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,4,128,1,fp8,fp8,0,1.5375071525573731
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,8,128,1,float16,fp8,0,1.5660719871520996
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,16,128,1,float16,float16,0,0.9757760047912598
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,16,128,1,float16,fp8,0,1.0392959594726563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,8,128,1,float16,float16,0,1.9915008544921875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,16,128,1,fp8,fp8,0,0.859284782409668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,1,128,1,float16,fp8,0,0.8027168273925781
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,1,128,1,float16,float16,0,1.0130224227905273
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,1,128,1,fp8,fp8,0,0.9655424118041992
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,2,128,1,float16,float16,0,0.9653247833251953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,2,128,1,fp8,fp8,0,0.8096431732177735
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,2,128,1,float16,fp8,0,0.9914768218994141
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,4,128,1,float16,float16,0,0.9034383773803711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,4,128,1,float16,fp8,0,0.9702896118164063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,4,128,1,fp8,fp8,0,0.8436127662658691
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,8,128,1,float16,float16,0,0.9493807792663574
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,8,128,1,float16,fp8,0,0.8087056159973145
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,16,128,1,float16,float16,0,0.510103988647461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,16,128,1,float16,fp8,0,0.5057712078094483
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,8,128,1,fp8,fp8,0,0.8869711875915527
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,16,128,1,fp8,fp8,0,0.4728096008300781
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,1,128,1,float16,fp8,0,0.43595519065856936
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,1,128,1,fp8,fp8,0,0.4360943794250488
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,1,128,1,float16,float16,0,0.5065872192382812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,2,128,1,float16,float16,0,0.4865231990814209
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,2,128,1,float16,fp8,0,0.45976958274841306
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,2,128,1,fp8,fp8,0,0.4530640125274658
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,4,128,1,float16,float16,0,0.4800992012023926
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,4,128,1,float16,fp8,0,0.48360638618469237
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,4,128,1,fp8,fp8,0,0.5052112102508545
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,8,128,1,float16,fp8,0,0.4500895977020264
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,8,128,1,float16,float16,0,0.5010464191436768
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,16,128,1,float16,float16,0,0.2855855941772461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,16,128,1,float16,fp8,0,0.28003840446472167
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,8,128,1,fp8,fp8,0,0.4387343883514404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,1,128,1,float16,float16,0,0.26630239486694335
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,1,128,1,float16,fp8,0,0.26496798992156984
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,1,128,1,fp8,fp8,0,0.25367839336395265
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,8,128,1,fp8,fp8,0,1.573417568206787
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,2,128,1,float16,fp8,0,0.2537775993347168
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,2,128,1,fp8,fp8,0,0.2561503887176514
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,4,128,1,float16,float16,0,0.2858464002609253
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,4,128,1,float16,fp8,0,0.25286240577697755
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,4,128,1,fp8,fp8,0,0.2527872085571289
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,8,128,1,float16,float16,0,0.275984001159668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,8,128,1,fp8,fp8,0,0.2674416065216064
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,8,128,1,float16,fp8,0,0.25479199886322024
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,1,128,1,float16,fp8,0,1.7842784881591798
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,1,128,1,fp8,fp8,0,1.781305694580078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,1,128,1,float16,float16,0,2.0101776123046875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,2,128,1,float16,float16,0,2.0899023056030273
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,2,128,1,float16,fp8,0,1.782094383239746
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,16,128,1,fp8,fp8,0,0.299619197845459
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,2,128,1,fp8,fp8,0,1.7844383239746093
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,2,128,1,float16,float16,0,0.27835679054260254
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,4,128,1,float16,fp8,0,1.7830575942993163
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,4,128,1,float16,float16,0,2.0465408325195313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,4,128,1,fp8,fp8,0,2.133742332458496
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,16,128,1,float16,float16,0,1.143113613128662
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,8,128,1,float16,fp8,0,1.7830751419067383
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,16,128,1,float16,fp8,0,0.9744607925415039
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,8,128,1,float16,float16,0,2.4164960861206053
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,8,128,1,fp8,fp8,0,1.7867631912231445
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,16,128,1,fp8,fp8,0,0.9788800239562988
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,1,128,1,float16,float16,0,1.1420639991760253
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,1,128,1,float16,fp8,0,0.9702768325805664
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,1,128,1,fp8,fp8,0,0.9370047569274902
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,2,128,1,float16,fp8,0,0.9193103790283204
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,2,128,1,float16,float16,0,1.0159407615661622
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,4,128,1,float16,fp8,0,0.9196928024291993
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,4,128,1,float16,float16,0,1.0471808433532714
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,2,128,1,fp8,fp8,0,1.115839958190918
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,16,128,1,float16,float16,0,0.6264287948608398
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,8,128,1,float16,fp8,0,0.9638895988464355
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,4,128,1,fp8,fp8,0,1.0818304061889648
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,8,128,1,fp8,fp8,0,0.9206624031066895
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,8,128,1,float16,float16,0,1.0761103630065918
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,16,128,1,fp8,fp8,0,0.5155519962310791
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,16,128,1,float16,fp8,0,0.5150063991546631
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,1,128,1,float16,float16,0,0.5916255950927735
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,1,128,1,float16,fp8,0,0.5707632064819336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,1,128,1,fp8,fp8,0,0.498140811920166
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,2,128,1,float16,float16,0,0.5530528068542481
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,2,128,1,fp8,fp8,0,0.5476992130279541
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,4,128,1,float16,fp8,0,0.4868368148803711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,4,128,1,float16,float16,0,0.5616864204406739
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,4,128,1,fp8,fp8,0,0.5346511840820313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,8,128,1,float16,float16,0,0.566318416595459
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,16,128,1,float16,float16,0,0.3311471939086914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,8,128,1,float16,fp8,0,0.49358558654785156
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,16,128,1,fp8,fp8,0,0.3098511934280396
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,8,128,1,fp8,fp8,0,0.5416639804840088
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,1,128,1,float16,float16,0,0.29436960220336916
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,1,128,1,float16,fp8,0,0.28823840618133545
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,1,128,1,fp8,fp8,0,0.29487359523773193
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,2,128,1,float16,float16,0,0.2879728078842163
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,2,128,1,float16,fp8,0,0.2919359922409058
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,2,128,1,fp8,fp8,0,0.30109119415283203
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,4,128,1,float16,float16,0,0.2938431978225708
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,4,128,1,float16,fp8,0,0.2783184051513672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,4,128,1,fp8,fp8,0,0.28447999954223635
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,8,128,1,float16,float16,0,0.30080161094665525
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,8,128,1,float16,fp8,0,0.28994719982147216
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,16,128,1,float16,float16,0,0.1827615976333618
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,8,128,1,fp8,fp8,0,0.2872368097305298
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,16,128,1,fp8,fp8,0,0.1721232056617737
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,16,128,1,float16,fp8,0,0.17375999689102173
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,1,128,1,float16,float16,0,0.17419999837875366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,1,128,1,float16,fp8,0,0.16324640512466432
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,1,128,1,fp8,fp8,0,0.16756000518798828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,2,128,1,float16,fp8,0,0.16663999557495118
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,2,128,1,fp8,fp8,0,0.1640768051147461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,4,128,1,float16,float16,0,0.17112319469451903
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,4,128,1,float16,fp8,0,0.16509599685668946
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,4,128,1,fp8,fp8,0,0.16695359945297242
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,8,128,1,float16,fp8,0,0.1631983995437622
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,8,128,1,fp8,fp8,0,0.1650912046432495
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,2,128,1,float16,fp8,0,0.4877808094024658
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,1,128,1,float16,float16,0,1.867416000366211
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,1,128,1,float16,fp8,0,1.7185279846191406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,1,128,1,fp8,fp8,0,1.7425952911376954
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,16,128,1,float16,fp8,0,0.286190390586853
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,2,128,1,float16,float16,0,0.17019200325012207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,2,128,1,float16,float16,0,1.8858816146850585
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,8,128,1,float16,float16,0,0.1757215976715088
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,4,128,1,float16,fp8,0,1.7157440185546875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,2,128,1,float16,fp8,0,1.9618000030517577
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,4,128,1,float16,float16,0,2.0112367630004884
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,4,128,1,fp8,fp8,0,1.7197744369506835
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,16,128,1,float16,fp8,0,0.9944623947143555
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,16,128,1,float16,float16,0,1.1090304374694824
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,16,128,1,fp8,fp8,0,1.2074959754943848
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,8,128,1,float16,float16,0,2.0785215377807615
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,8,128,1,float16,fp8,0,1.918716812133789
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,8,128,1,fp8,fp8,0,1.7237600326538085
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,1,128,1,float16,float16,0,0.956112003326416
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,1,128,1,float16,fp8,0,0.891652774810791
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,1,128,1,fp8,fp8,0,0.9559295654296875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,2,128,1,float16,float16,0,1.024187183380127
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,2,128,1,fp8,fp8,0,0.9317999839782715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,2,128,1,float16,fp8,0,1.0450927734375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,4,128,1,float16,fp8,0,0.9073391914367676
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,4,128,1,fp8,fp8,0,0.8901503562927247
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,16,128,1,float16,float16,0,0.5800032138824462
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,16,128,1,float16,fp8,0,0.5200384140014649
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,2,128,1,fp8,fp8,0,1.716102409362793
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,8,128,1,fp8,fp8,0,0.8932016372680665
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,8,128,1,float16,fp8,0,1.0078191757202148
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,16,128,1,fp8,fp8,0,0.4972544193267822
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,1,128,1,float16,float16,0,0.49358720779418946
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,1,128,1,float16,fp8,0,0.4690159797668457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,1,128,1,fp8,fp8,0,0.45870561599731446
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,2,128,1,float16,float16,0,0.4890255928039551
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,2,128,1,float16,fp8,0,0.49584479331970216
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,2,128,1,fp8,fp8,0,0.45937280654907225
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,4,128,1,float16,float16,0,0.5133471965789795
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,4,128,1,float16,fp8,0,0.4696415901184082
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,4,128,1,fp8,fp8,0,0.4597008228302002
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,8,128,1,float16,float16,0,0.520030403137207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,16,128,1,float16,float16,0,0.3091599941253662
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,8,128,1,float16,fp8,0,0.4757023811340332
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,16,128,1,fp8,fp8,0,0.2698048114776611
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,1,128,1,float16,float16,0,0.2596992015838623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,1,128,1,fp8,fp8,0,0.25846879482269286
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,2,128,1,float16,float16,0,0.26001439094543455
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,1,128,1,float16,fp8,0,0.2839423894882202
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,2,128,1,float16,fp8,0,0.2563199996948242
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,2,128,1,fp8,fp8,0,0.2532527923583984
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,4,128,1,float16,fp8,0,0.25085599422454835
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,4,128,1,fp8,fp8,0,0.25623838901519774
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,8,128,1,float16,float16,0,0.27727200984954836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,8,128,1,float16,fp8,0,0.25090560913085935
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,8,128,1,fp8,fp8,0,0.25648961067199705
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,16,128,1,float16,float16,0,0.16770720481872559
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,16,128,1,float16,fp8,0,0.15538400411605835
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,16,128,1,fp8,fp8,0,0.15656960010528564
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,1,128,1,float16,float16,0,0.14991040229797364
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,1,128,1,float16,fp8,0,0.14506399631500244
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,1,128,1,fp8,fp8,0,0.14613120555877684
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,2,128,1,float16,float16,0,0.14962879419326783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,2,128,1,float16,fp8,0,0.14495840072631835
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,2,128,1,fp8,fp8,0,0.14515039920806885
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,4,128,1,float16,float16,0,0.15278079509735107
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,4,128,1,float16,fp8,0,0.14497920274734497
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,4,128,1,fp8,fp8,0,0.14489920139312745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,8,128,1,float16,float16,0,0.15911519527435303
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,8,128,1,float16,fp8,0,0.14575519561767578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,8,128,1,fp8,fp8,0,0.14532320499420165
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,16,128,1,float16,float16,0,0.10576159954071045
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,16,128,1,float16,fp8,0,0.09772480130195618
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,16,128,1,fp8,fp8,0,0.0982815980911255
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,1,128,1,float16,float16,0,0.09729599952697754
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,1,128,1,float16,fp8,0,0.09283199906349182
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,2,128,1,float16,float16,0,0.09683679938316345
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,2,128,1,float16,fp8,0,0.09300479888916016
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,2,128,1,fp8,fp8,0,0.09333119988441467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,4,128,1,float16,float16,0,0.09770399928092957
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,4,128,1,float16,fp8,0,0.09319679737091065
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,4,128,1,fp8,fp8,0,0.09319999814033508
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,8,128,1,fp8,fp8,0,0.46062560081481935
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,8,128,1,float16,fp8,0,0.09357439875602722
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,8,128,1,fp8,fp8,0,0.09362720251083374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,4,128,1,float16,float16,0,0.2728832006454468
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,1,128,1,float16,float16,0,1.1376208305358886
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,4,128,1,float16,float16,0,0.9859919548034668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,1,128,1,float16,fp8,0,1.0553071975708008
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,8,128,1,float16,float16,0,1.024176025390625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,1,128,1,fp8,fp8,0,1.1060848236083984
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,2,128,1,float16,float16,0,1.139350414276123
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,1,128,1,fp8,fp8,0,0.09365119934082031
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,2,128,1,float16,fp8,0,1.055788803100586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,2,128,1,fp8,fp8,0,1.0517840385437012
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,8,128,1,float16,float16,0,0.10017600059509277
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,16,128,1,float16,fp8,0,0.27519679069519043
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,4,128,1,float16,float16,0,1.1738256454467773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,4,128,1,float16,fp8,0,1.1248208045959474
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,4,128,1,fp8,fp8,0,1.0549872398376465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,8,128,1,float16,float16,0,1.2454912185668945
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,16,128,1,float16,float16,0,0.687440013885498
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,8,128,1,float16,fp8,0,1.0536656379699707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,8,128,1,fp8,fp8,0,1.0550928115844727
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,1,128,1,float16,float16,0,0.5693759918212891
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,16,128,1,fp8,fp8,0,0.6229423999786377
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,1,128,1,fp8,fp8,0,0.5430816173553467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,2,128,1,float16,float16,0,0.5736480236053467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,1,128,1,float16,fp8,0,0.6184815883636474
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,2,128,1,fp8,fp8,0,0.5436816215515137
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,4,128,1,float16,float16,0,0.594539213180542
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,4,128,1,float16,fp8,0,0.5434703826904297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,16,128,1,float16,float16,0,0.3552544116973877
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,8,128,1,float16,float16,0,0.6279119968414306
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,8,128,1,float16,fp8,0,0.5439871788024903
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,16,128,1,float16,fp8,0,0.3160304069519043
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,16,128,1,fp8,fp8,0,0.32238240242004396
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,1,128,1,float16,float16,0,0.30314719676971436
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,1,128,1,float16,fp8,0,0.2889391899108887
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,1,128,1,fp8,fp8,0,0.28849918842315675
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,2,128,1,float16,float16,0,0.30431039333343507
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,2,128,1,float16,fp8,0,0.30616800785064696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,2,128,1,fp8,fp8,0,0.2886751890182495
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,4,128,1,float16,float16,0,0.3056960105895996
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,4,128,1,float16,fp8,0,0.2947999954223633
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,4,128,1,fp8,fp8,0,0.30027520656585693
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,8,128,1,float16,float16,0,0.3179248094558716
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,16,128,1,float16,float16,0,0.19376319646835327
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,8,128,1,float16,fp8,0,0.2897696018218994
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,8,128,1,fp8,fp8,0,0.2947360038757324
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,16,128,1,float16,fp8,0,0.17598719596862794
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,16,128,1,fp8,fp8,0,0.1816159963607788
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,1,128,1,float16,float16,0,0.16248639822006225
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,1,128,1,float16,fp8,0,0.16312320232391359
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,1,128,1,fp8,fp8,0,0.16030240058898926
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,2,128,1,float16,float16,0,0.16673120260238647
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,2,128,1,fp8,fp8,0,0.1631119966506958
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,16,128,1,float16,fp8,0,0.6856063842773438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,4,128,1,float16,fp8,0,0.16234879493713378
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,4,128,1,fp8,fp8,0,0.16349600553512572
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,8,128,1,float16,float16,0,0.17574080228805541
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,8,128,1,float16,fp8,0,0.16227200031280517
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,8,128,1,fp8,fp8,0,0.1626512050628662
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,16,128,1,float16,float16,0,0.110862398147583
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,16,128,1,float16,fp8,0,0.10379680395126342
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,16,128,1,fp8,fp8,0,0.103985595703125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,1,128,1,float16,float16,0,0.09802719950675964
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,1,128,1,float16,fp8,0,0.09709280133247375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,1,128,1,fp8,fp8,0,0.09609599709510804
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,2,128,1,float16,fp8,0,0.5432079792022705
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,2,128,1,float16,fp8,0,0.09730719923973083
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,2,128,1,fp8,fp8,0,0.09633439779281616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,4,128,1,float16,float16,0,0.1003648042678833
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,4,128,1,float16,fp8,0,0.09727039933204651
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,4,128,1,fp8,fp8,0,0.09639359712600708
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,8,128,1,float16,float16,0,0.10474720001220703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,8,128,1,float16,fp8,0,0.0967743992805481
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,16,128,1,float16,float16,0,0.06927840113639831
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,8,128,1,fp8,fp8,0,0.09675040245056152
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,16,128,1,float16,fp8,0,0.0677839994430542
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,8,128,1,fp8,fp8,0,0.559116792678833
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,16,128,1,fp8,fp8,0,0.06767039895057678
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,1,128,1,float16,fp8,0,0.06381120085716248
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,1,128,1,float16,float16,0,0.06633279919624328
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,1,128,1,fp8,fp8,0,0.0638592004776001
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,2,128,1,float16,float16,0,0.06624959707260132
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,2,128,1,float16,fp8,0,0.0637935996055603
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,2,128,1,fp8,fp8,0,0.06362879872322083
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,4,128,1,float16,float16,0,0.06622239947319031
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,4,128,1,float16,fp8,0,0.06381760239601135
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,4,128,1,fp8,fp8,0,0.06388480067253113
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,8,128,1,float16,float16,0,0.06803359985351562
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,8,128,1,float16,fp8,0,0.06348479986190796
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,8,128,1,fp8,fp8,0,0.06371679902076721
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,1,128,1,float16,float16,0,1.1040736198425294
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,1,128,1,float16,fp8,0,1.0687552452087403
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,2,128,1,float16,float16,0,1.1142640113830566
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,2,128,1,float16,fp8,0,0.159934401512146
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,4,128,1,float16,float16,0,0.16837600469589234
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,2,128,1,fp8,fp8,0,1.0692399978637694
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,2,128,1,float16,fp8,0,1.168342399597168
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,2,128,1,float16,float16,0,0.09770240187644959
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,4,128,1,float16,float16,0,1.1807616233825684
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,4,128,1,float16,fp8,0,1.070019245147705
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,4,128,1,fp8,fp8,0,1.0670592308044433
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,4,128,1,fp8,fp8,0,0.6028687953948975
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,16,128,1,float16,fp8,0,0.6182256221771241
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,16,128,1,float16,float16,0,0.7601952075958252
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,8,128,1,float16,fp8,0,1.10316162109375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,16,128,1,fp8,fp8,0,0.6178847789764405
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,1,128,1,float16,float16,0,0.5585264205932617
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,1,128,1,float16,fp8,0,0.6118576049804687
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,1,128,1,fp8,fp8,0,0.553764820098877
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,2,128,1,float16,float16,0,0.5601984024047851
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,2,128,1,float16,fp8,0,0.5467296123504639
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,1,128,1,fp8,fp8,0,1.0688400268554688
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,2,128,1,fp8,fp8,0,0.6118175983428955
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,4,128,1,float16,float16,0,0.5853392124176026
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,4,128,1,float16,fp8,0,0.5835936069488525
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,4,128,1,fp8,fp8,0,0.5603087902069092
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,8,128,1,float16,float16,0,0.6185823917388916
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,16,128,1,float16,float16,0,0.367412805557251
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,8,128,1,float16,fp8,0,0.5601344108581543
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,8,128,1,fp8,fp8,0,0.5489503860473632
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,16,128,1,float16,fp8,0,0.3276479959487915
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,1,128,1,float16,float16,0,0.28803040981292727
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,1,128,1,float16,fp8,0,0.29033598899841306
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,1,128,1,fp8,fp8,0,0.28734400272369387
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,2,128,1,float16,float16,0,0.28853280544281007
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,2,128,1,float16,fp8,0,0.2911664009094238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,2,128,1,fp8,fp8,0,0.28919520378112795
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,4,128,1,float16,float16,0,0.2992208003997803
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,4,128,1,float16,fp8,0,0.2874720096588135
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,4,128,1,fp8,fp8,0,0.2900815963745117
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,16,128,1,float16,float16,0,0.18980319499969484
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,8,128,1,fp8,fp8,0,0.2884848117828369
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,8,128,1,float16,fp8,0,0.29006879329681395
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,16,128,1,float16,fp8,0,0.17714719772338866
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,16,128,1,fp8,fp8,0,0.17517919540405275
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,1,128,1,float16,float16,0,0.15478240251541137
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,1,128,1,float16,fp8,0,0.15780479907989503
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,1,128,1,fp8,fp8,0,0.15745279788970948
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,8,128,1,fp8,fp8,0,1.069480037689209
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,8,128,1,float16,float16,0,1.2456735610961913
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,2,128,1,float16,float16,0,0.1557136058807373
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,2,128,1,float16,fp8,0,0.15660640001296997
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,2,128,1,fp8,fp8,0,0.15858399868011475
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,4,128,1,float16,float16,0,0.1603119969367981
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,4,128,1,float16,fp8,0,0.15604000091552733
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,8,128,1,float16,fp8,0,0.15788639783859254
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,8,128,1,fp8,fp8,0,0.1573855996131897
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,16,128,1,float16,float16,0,0.10781760215759277
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,16,128,1,float16,fp8,0,0.10019840002059936
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,16,128,1,fp8,fp8,0,0.10088959932327271
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,1,128,1,float16,float16,0,0.09035519957542419
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,1,128,1,float16,fp8,0,0.09049440026283265
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,1,128,1,fp8,fp8,0,0.09006239771842957
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,2,128,1,float16,float16,0,0.09049919843673707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,2,128,1,float16,fp8,0,0.09039520025253296
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,2,128,1,fp8,fp8,0,0.09005439877510071
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,4,128,1,float16,float16,0,0.09358239769935608
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,4,128,1,float16,fp8,0,0.09068959951400757
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,4,128,1,fp8,fp8,0,0.09045439958572388
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,8,128,1,float16,float16,0,0.09896640181541443
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,8,128,1,float16,fp8,0,0.09078720211982727
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,8,128,1,fp8,fp8,0,0.0908191978931427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,16,128,1,float16,float16,0,0.06535199880599976
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,16,128,1,float16,fp8,0,0.06140000224113464
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,1,128,1,float16,float16,0,0.058771198987960814
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,1,128,1,float16,fp8,0,0.057291197776794436
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,1,128,1,fp8,fp8,0,0.05703520178794861
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,2,128,1,float16,float16,0,0.05888640284538269
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,2,128,1,float16,fp8,0,0.05676320195198059
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,2,128,1,fp8,fp8,0,0.056852799654006955
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,4,128,1,float16,float16,0,0.059716802835464475
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,4,128,1,float16,fp8,0,0.05666239857673645
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,4,128,1,fp8,fp8,0,0.05727199912071228
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,8,128,1,float16,float16,0,0.06217920184135437
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,8,128,1,float16,fp8,0,0.05692160129547119
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,16,128,1,float16,float16,0,0.039233601093292235
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,16,128,1,float16,fp8,0,0.038966399431228635
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,16,128,1,fp8,fp8,0,0.03831200003623962
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,1,128,1,float16,float16,0,0.03710399866104126
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,1,128,1,float16,fp8,0,0.035945600271224974
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,1,128,1,fp8,fp8,0,0.036801600456237794
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,2,128,1,float16,float16,0,0.03701759874820709
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,2,128,1,float16,fp8,0,0.03704319894313812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,16,128,1,fp8,fp8,0,0.3236815929412842
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,4,128,1,float16,float16,0,0.037427198886871335
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,4,128,1,float16,fp8,0,0.0370608001947403
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,4,128,1,fp8,fp8,0,0.03660320043563843
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,8,128,1,float16,float16,0,0.0390751987695694
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,8,128,1,float16,fp8,0,0.036585599184036255
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,8,128,1,fp8,fp8,0,0.03699679970741272
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,8,128,1,float16,float16,0,0.31768798828125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,1,128,1,float16,float16,0,0.6818895816802979
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,1,128,1,float16,fp8,0,0.6846672058105469
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,1,128,1,fp8,fp8,0,0.6826432228088379
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,4,128,1,fp8,fp8,0,0.15603359937667846
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,2,128,1,float16,float16,0,0.6848591804504395
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,2,128,1,float16,fp8,0,0.6838560104370117
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,2,128,1,fp8,fp8,0,0.6827487945556641
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,4,128,1,float16,float16,0,0.7219632148742676
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,16,128,1,fp8,fp8,0,0.061668801307678225
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,4,128,1,float16,fp8,0,0.7879168033599854
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,4,128,1,fp8,fp8,0,0.6839983940124512
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,8,128,1,fp8,fp8,0,0.05751519799232483
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,8,128,1,float16,fp8,0,0.6865776062011719
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,8,128,1,fp8,fp8,0,0.6847887992858886
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,16,128,1,float16,float16,0,0.4572783946990967
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,2,128,1,fp8,fp8,0,0.036259201169013974
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,16,128,1,fp8,fp8,0,0.4072847843170166
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,1,128,1,float16,fp8,0,0.3530639886856079
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,1,128,1,fp8,fp8,0,0.3527600049972534
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,2,128,1,float16,float16,0,0.35169119834899903
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,2,128,1,float16,fp8,0,0.3529632091522217
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,2,128,1,fp8,fp8,0,0.3537215948104858
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,4,128,1,float16,float16,0,0.36667039394378664
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,4,128,1,float16,fp8,0,0.3533071994781494
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,4,128,1,fp8,fp8,0,0.3542335987091064
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,8,128,1,float16,float16,0,0.17063039541244507
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,8,128,1,float16,fp8,0,0.35398719310760496
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,8,128,1,fp8,fp8,0,0.3548352003097534
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,16,128,1,float16,fp8,0,0.21521921157836915
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,16,128,1,float16,float16,0,0.235481595993042
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,16,128,1,fp8,fp8,0,0.21576159000396727
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,1,128,1,float16,float16,0,0.18016480207443236
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,1,128,1,float16,fp8,0,0.18774080276489258
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,1,128,1,fp8,fp8,0,0.1881392002105713
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,2,128,1,float16,float16,0,0.18152960538864135
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,2,128,1,float16,fp8,0,0.18761119842529297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,2,128,1,fp8,fp8,0,0.18797919750213624
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,4,128,1,float16,float16,0,0.18910720348358154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,4,128,1,float16,fp8,0,0.18784960508346557
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,4,128,1,fp8,fp8,0,0.18816959857940674
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,8,128,1,float16,float16,0,0.2046207904815674
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,8,128,1,float16,fp8,0,0.1886080026626587
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,8,128,1,fp8,fp8,0,0.18879519701004027
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,16,128,1,float16,float16,0,0.12825920581817626
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,8,128,1,float16,float16,0,0.7762576103210449
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,16,128,1,float16,fp8,0,0.11919679641723632
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,16,128,1,fp8,fp8,0,0.11979360580444336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,16,128,1,float16,fp8,0,0.40732321739196775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,1,128,1,fp8,fp8,0,0.10403519868850708
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,2,128,1,float16,float16,0,0.10342559814453126
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,1,128,1,float16,float16,0,0.34936959743499757
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,2,128,1,fp8,fp8,0,0.1036255955696106
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,4,128,1,float16,float16,0,0.10742399692535401
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,2,128,1,float16,fp8,0,0.10478240251541138
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,4,128,1,float16,fp8,0,0.10425920486450195
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,4,128,1,fp8,fp8,0,0.10433599948883057
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,8,128,1,float16,float16,0,0.11496800184249878
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,8,128,1,float16,fp8,0,0.10540319681167602
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,8,128,1,fp8,fp8,0,0.10531680583953858
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,16,128,1,float16,float16,0,0.07444000244140625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,16,128,1,float16,fp8,0,0.06902080178260803
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,16,128,1,fp8,fp8,0,0.06821280121803283
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,1,128,1,float16,float16,0,0.06167680025100708
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,1,128,1,float16,fp8,0,0.06187199950218201
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,2,128,1,float16,float16,0,0.061689597368240354
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,2,128,1,float16,fp8,0,0.06167200207710266
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,4,128,1,float16,float16,0,0.06333280205726624
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,4,128,1,float16,fp8,0,0.061654400825500486
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,4,128,1,fp8,fp8,0,0.061668801307678225
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,8,128,1,float16,float16,0,0.06803839802742004
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,8,128,1,float16,fp8,0,0.06196640133857727
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,8,128,1,fp8,fp8,0,0.0622767984867096
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,16,128,1,float16,float16,0,0.04508639872074127
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,16,128,1,float16,fp8,0,0.043278399109840396
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,16,128,1,fp8,fp8,0,0.04354400038719177
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,8,128,1,float16,float16,0,0.39689600467681885
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,1,128,1,float16,fp8,0,0.04092000126838684
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,1,128,1,fp8,fp8,0,0.04118880033493042
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,2,128,1,float16,float16,0,0.04132480025291443
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,2,128,1,fp8,fp8,0,0.04117920100688934
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,4,128,1,float16,float16,0,0.042510399222373964
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,4,128,1,float16,fp8,0,0.041196799278259276
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,4,128,1,fp8,fp8,0,0.041203200817108154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,8,128,1,float16,float16,0,0.04325439929962158
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,8,128,1,float16,fp8,0,0.041177600622177124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,8,128,1,fp8,fp8,0,0.041228801012039185
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,16,128,1,float16,float16,0,0.03190560042858124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,16,128,1,float16,fp8,0,0.030928000807762146
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,16,128,1,fp8,fp8,0,0.030907198786735535
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,1,128,1,float16,float16,0,0.029084798693656922
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,1,128,1,float16,fp8,0,0.028883200883865357
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,1,128,1,fp8,fp8,0,0.028916800022125246
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,2,128,1,float16,float16,0,0.02967360019683838
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,2,128,1,float16,fp8,0,0.028910401463508605
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,2,128,1,fp8,fp8,0,0.028937599062919615
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,4,128,1,float16,float16,0,0.030425599217414855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,4,128,1,float16,fp8,0,0.02892960011959076
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,4,128,1,fp8,fp8,0,0.02887519896030426
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,8,128,1,float16,float16,0,0.03088639974594116
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,8,128,1,float16,fp8,0,0.028896000981330872
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,8,128,1,fp8,fp8,0,0.028935998678207397
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,1,128,1,float16,float16,0,0.10294560194015503
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,1,128,1,float16,fp8,0,0.10347360372543335
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,1,128,1,float16,float16,0,0.7099728107452392
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,1,128,1,float16,fp8,0,0.743126392364502
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,1,128,1,fp8,fp8,0,0.7413167953491211
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,2,128,1,float16,float16,0,0.7109839916229248
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,1,128,1,fp8,fp8,0,0.062275201082229614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,2,128,1,fp8,fp8,0,0.062134397029876706
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,2,128,1,float16,fp8,0,0.7622432231903076
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,2,128,1,fp8,fp8,0,0.7430384159088135
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,1,128,1,float16,float16,0,0.04121119976043701
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,4,128,1,float16,float16,0,0.7496463775634765
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,4,128,1,fp8,fp8,0,0.7396880149841308
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,4,128,1,float16,fp8,0,0.7422671794891358
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,2,128,1,float16,fp8,0,0.04108000099658966
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,8,128,1,float16,float16,0,0.8341199874877929
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,8,128,1,float16,fp8,0,0.7446432113647461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,16,128,1,float16,float16,0,0.5070752143859864
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,16,128,1,fp8,fp8,0,0.4519487857818604
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,8,128,1,fp8,fp8,0,0.7444399833679199
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,1,128,1,float16,fp8,0,0.37875359058380126
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,1,128,1,fp8,fp8,0,0.379911994934082
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,2,128,1,float16,float16,0,0.35886080265045167
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,2,128,1,float16,fp8,0,0.38027520179748536
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,2,128,1,fp8,fp8,0,0.37997438907623293
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,4,128,1,float16,float16,0,0.37851839065551757
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,4,128,1,float16,fp8,0,0.38008320331573486
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,4,128,1,fp8,fp8,0,0.3800463914871216
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,8,128,1,float16,float16,0,0.4220287799835205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,8,128,1,float16,fp8,0,0.38142080307006837
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,16,128,1,float16,float16,0,0.25932960510253905
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,8,128,1,fp8,fp8,0,0.38039679527282716
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,16,128,1,float16,fp8,0,0.23473598957061767
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,16,128,1,fp8,fp8,0,0.2389904022216797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,1,128,1,float16,float16,0,0.18536319732666015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,1,128,1,float16,fp8,0,0.19965920448303223
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,1,128,1,fp8,fp8,0,0.19852160215377807
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,2,128,1,float16,float16,0,0.18549439907073975
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,2,128,1,float16,fp8,0,0.20262560844421387
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,2,128,1,fp8,fp8,0,0.1986127972602844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,4,128,1,float16,float16,0,0.19600319862365723
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,4,128,1,float16,fp8,0,0.19889600276947023
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,4,128,1,fp8,fp8,0,0.19990559816360473
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,8,128,1,float16,float16,0,0.2184448003768921
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,8,128,1,float16,fp8,0,0.1999135971069336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,16,128,1,float16,float16,0,0.13706239461898803
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,8,128,1,fp8,fp8,0,0.20005440711975098
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,16,128,1,float16,fp8,0,0.12638880014419557
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,1,128,1,float16,float16,0,0.10313279628753662
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,16,128,1,fp8,fp8,0,0.12890080213546753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,1,128,1,float16,fp8,0,0.11227359771728515
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,1,128,1,fp8,fp8,0,0.10760639905929566
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,2,128,1,float16,fp8,0,0.1084928035736084
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,2,128,1,fp8,fp8,0,0.1086959958076477
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,4,128,1,float16,float16,0,0.1095695972442627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,4,128,1,float16,fp8,0,0.10843199491500854
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,4,128,1,fp8,fp8,0,0.10902400016784668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,8,128,1,float16,float16,0,0.11779359579086304
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,8,128,1,float16,fp8,0,0.11024800539016724
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,8,128,1,fp8,fp8,0,0.10872479677200317
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,16,128,1,float16,float16,0,0.07677119970321655
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,16,128,1,float16,fp8,0,0.07196000218391418
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,1,128,1,float16,float16,0,0.06012639999389648
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,16,128,1,fp8,fp8,0,0.07266560196876526
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,1,128,1,fp8,fp8,0,0.061431998014450075
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,2,128,1,float16,float16,0,0.060046398639678956
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,2,128,1,float16,fp8,0,0.06193280220031738
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,2,128,1,fp8,fp8,0,0.061659198999404904
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,4,128,1,float16,float16,0,0.06291679739952087
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,4,128,1,float16,fp8,0,0.061883199214935306
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,4,128,1,fp8,fp8,0,0.06166239976882935
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,8,128,1,float16,float16,0,0.068476802110672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,8,128,1,float16,fp8,0,0.06201440095901489
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,16,128,1,float16,float16,0,0.04559040069580078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,8,128,1,fp8,fp8,0,0.0619871973991394
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,16,128,1,float16,fp8,0,0.04348160028457641
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,1,128,1,float16,float16,0,0.3560031890869141
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,16,128,1,fp8,fp8,0,0.043201598525047305
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,1,128,1,float16,float16,0,0.03900319933891296
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,1,128,1,float16,fp8,0,0.039233601093292235
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,1,128,1,fp8,fp8,0,0.039110401272773744
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,2,128,1,float16,float16,0,0.03917120099067688
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,2,128,1,fp8,fp8,0,0.039155200123786926
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,2,128,1,float16,fp8,0,0.03913759887218475
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,4,128,1,float16,fp8,0,0.038955199718475345
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,4,128,1,float16,float16,0,0.039854401350021364
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,4,128,1,fp8,fp8,0,0.039103999733924866
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,8,128,1,float16,float16,0,0.04165599942207336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,8,128,1,float16,fp8,0,0.03904959857463837
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,8,128,1,fp8,fp8,0,0.03916319906711578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,16,128,1,float16,float16,0,0.026819199323654175
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,16,128,1,float16,fp8,0,0.02682720124721527
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,16,128,1,fp8,fp8,0,0.02677919864654541
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,1,128,1,float16,float16,0,0.024828800559043886
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,1,128,1,fp8,fp8,0,0.024803200364112855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,2,128,1,float16,float16,0,0.024743999540805816
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,2,128,1,float16,fp8,0,0.024822400510311128
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,2,128,1,fp8,fp8,0,0.024769599735736846
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,4,128,1,float16,float16,0,0.024830399453639983
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,4,128,1,fp8,fp8,0,0.02476159930229187
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,8,128,1,float16,float16,0,0.026811200380325317
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,8,128,1,float16,fp8,0,0.02475520074367523
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,8,128,1,fp8,fp8,0,0.024753600358963013
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,16,128,1,float16,float16,0,0.024748800694942473
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,16,128,1,float16,fp8,0,0.024619199335575104
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,1,128,1,float16,float16,0,0.022679999470710754
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,1,128,1,float16,fp8,0,0.022780799865722658
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,1,128,1,fp8,fp8,0,0.022697600722312927
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,2,128,1,float16,float16,0,0.02274879962205887
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,2,128,1,float16,fp8,0,0.022654399275779724
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,2,128,1,fp8,fp8,0,0.022681599855422972
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,4,128,1,float16,float16,0,0.022755199670791627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,4,128,1,float16,fp8,0,0.022808000445365906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,4,128,1,fp8,fp8,0,0.02265920042991638
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,8,128,1,float16,float16,0,0.024484799802303316
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,8,128,1,float16,fp8,0,0.022767999768257143
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,2,128,1,float16,float16,0,0.10349279642105103
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,8,128,1,fp8,fp8,0,0.02268799990415573
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,1,128,1,float16,fp8,0,0.06170079708099365
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,16,1,128,1,float16,fp8,0,0.5769167900085449
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,16,128,1,float16,fp8,0,0.4499536037445068
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,16,1,128,1,fp8,fp8,0,0.5777743816375732
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,16,2,128,1,float16,float16,0,0.5119760036468506
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,1,128,1,float16,fp8,0,0.02478879988193512
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,16,2,128,1,float16,fp8,0,0.5773280143737793
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,16,2,128,1,fp8,fp8,0,0.576639986038208
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,16,4,128,1,float16,float16,0,0.5545023918151856
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,16,128,1,fp8,fp8,0,0.024604800343513488
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,16,4,128,1,float16,fp8,0,0.5745567798614502
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,16,4,128,1,fp8,fp8,0,0.5755104064941406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,16,8,128,1,float16,float16,0,0.6418960094451904
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,16,8,128,1,float16,fp8,0,0.5925024032592774
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,16,16,128,1,float16,float16,0,0.4074880123138428
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,16,8,128,1,fp8,fp8,0,0.5774096012115478
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,16,16,128,1,float16,fp8,0,0.3634144067764282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,16,1,128,1,float16,fp8,0,0.29892959594726565
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,16,16,128,1,fp8,fp8,0,0.36389598846435545
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,16,1,128,1,fp8,fp8,0,0.2943968057632446
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,16,2,128,1,float16,float16,0,0.2610447883605957
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,16,2,128,1,float16,fp8,0,0.29494400024414064
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,16,2,128,1,fp8,fp8,0,0.29834558963775637
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,16,4,128,1,float16,float16,0,0.28154399394989016
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,16,4,128,1,float16,fp8,0,0.2943264007568359
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,16,8,128,1,float16,float16,0,0.3220752000808716
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,16,1,128,1,float16,float16,0,0.5023615837097168
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,16,8,128,1,float16,fp8,0,0.29792799949646
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,16,8,128,1,fp8,fp8,0,0.29543840885162354
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,16,16,128,1,float16,float16,0,0.20957279205322266
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,16,16,128,1,float16,fp8,0,0.18933600187301636
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,16,1,128,1,float16,float16,0,0.138428795337677
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,16,1,128,1,float16,fp8,0,0.15476000308990479
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,16,16,128,1,fp8,fp8,0,0.18954720497131347
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,16,1,128,1,fp8,fp8,0,0.15460959672927857
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,16,2,128,1,float16,float16,0,0.13868000507354736
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,16,2,128,1,float16,fp8,0,0.15430400371551514
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,16,2,128,1,fp8,fp8,0,0.15425920486450195
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,16,4,128,1,float16,float16,0,0.14760799407958985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,16,4,128,1,float16,fp8,0,0.15476640462875366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,16,4,128,1,fp8,fp8,0,0.15517120361328124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,16,8,128,1,float16,float16,0,0.16730560064315797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,16,8,128,1,fp8,fp8,0,0.1555999994277954
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,16,8,128,1,float16,fp8,0,0.15513279438018798
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,16,16,128,1,float16,fp8,0,0.10259840488433838
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,16,16,128,1,fp8,fp8,0,0.10236959457397461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,16,1,128,1,float16,float16,0,0.07803519964218139
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,16,1,128,1,float16,fp8,0,0.08395199775695801
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,16,1,128,1,fp8,fp8,0,0.08430560231208802
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,16,2,128,1,float16,float16,0,0.07811840176582337
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,16,2,128,1,float16,fp8,0,0.08446239829063415
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,16,2,128,1,fp8,fp8,0,0.0843616008758545
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,16,4,128,1,float16,float16,0,0.08277279734611512
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,16,4,128,1,float16,fp8,0,0.08427199721336365
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,16,4,128,1,fp8,fp8,0,0.08453440070152282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,16,8,128,1,float16,float16,0,0.09193919897079468
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,4,128,1,float16,fp8,0,0.024827200174331664
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,16,8,128,1,fp8,fp8,0,0.08479679822921753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,16,16,128,1,float16,fp8,0,0.05756639838218689
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,16,1,128,1,float16,float16,0,0.04466559886932373
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,16,16,128,1,fp8,fp8,0,0.05756160020828247
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,16,1,128,1,float16,fp8,0,0.047363200783729555
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,16,1,128,1,fp8,fp8,0,0.0473471999168396
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,16,2,128,1,float16,float16,0,0.04530400037765503
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,16,2,128,1,float16,fp8,0,0.04735040068626404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,16,2,128,1,fp8,fp8,0,0.04734559953212738
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,16,4,128,1,float16,float16,0,0.04748319983482361
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,16,4,128,1,fp8,fp8,0,0.04743840098381043
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,16,8,128,1,float16,float16,0,0.05348640084266663
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,16,8,128,1,float16,fp8,0,0.04722239971160889
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,16,8,128,1,fp8,fp8,0,0.04743840098381043
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,16,16,128,1,float16,fp8,0,0.03461439907550812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,16,1,128,1,float16,float16,0,0.25925920009613035
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,16,16,128,1,fp8,fp8,0,0.03510720133781433
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,16,1,128,1,float16,float16,0,0.028918400406837463
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,16,1,128,1,float16,fp8,0,0.030372801423072814
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,16,1,128,1,fp8,fp8,0,0.028918400406837463
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,16,2,128,1,float16,float16,0,0.02895520031452179
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,16,2,128,1,float16,fp8,0,0.02902719974517822
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,16,2,128,1,fp8,fp8,0,0.029120001196861266
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,16,4,128,1,float16,float16,0,0.030432000756263733
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,16,4,128,1,float16,fp8,0,0.02898080050945282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,16,4,128,1,fp8,fp8,0,0.03017280101776123
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,16,8,128,1,float16,float16,0,0.03175199925899506
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,16,4,128,1,fp8,fp8,0,0.2962831974029541
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,16,8,128,1,float16,fp8,0,0.030619201064109803
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,16,8,128,1,fp8,fp8,0,0.029334399104118346
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,16,16,128,1,float16,float16,0,0.020707200467586517
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,16,16,128,1,float16,fp8,0,0.02072799950838089
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,16,16,128,1,fp8,fp8,0,0.020739200711250304
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,16,1,128,1,float16,float16,0,0.018590399622917177
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,16,1,128,1,float16,fp8,0,0.018598400056362152
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,16,1,128,1,fp8,fp8,0,0.018691200017929076
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,16,2,128,1,float16,float16,0,0.018606400489807128
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,16,2,128,1,fp8,fp8,0,0.018620799481868743
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,16,4,128,1,float16,float16,0,0.018680000305175783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,16,16,128,1,float16,float16,0,0.11145919561386108
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,16,4,128,1,float16,fp8,0,0.018667200207710268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,16,4,128,1,fp8,fp8,0,0.01866080015897751
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,16,8,128,1,float16,float16,0,0.020399999618530274
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,16,8,128,1,float16,fp8,0,0.018611200153827667
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,16,8,128,1,fp8,fp8,0,0.018607999384403228
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,16,16,128,1,float16,float16,0,0.018571199476718904
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,16,16,128,1,float16,fp8,0,0.018624000251293182
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,16,16,128,1,fp8,fp8,0,0.0186256006360054
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,16,1,128,1,float16,float16,0,0.016543999314308167
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,16,1,128,1,float16,fp8,0,0.01658560037612915
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,16,16,128,1,float16,float16,0,0.06180319786071777
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,16,1,128,1,fp8,fp8,0,0.016540800034999848
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,16,2,128,1,float16,fp8,0,0.01687680035829544
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,16,2,128,1,fp8,fp8,0,0.0165583997964859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,16,4,128,1,float16,float16,0,0.017159999907016756
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,16,4,128,1,float16,fp8,0,0.01663520038127899
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,16,4,128,1,fp8,fp8,0,0.016659200191497803
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,16,8,128,1,float16,float16,0,0.016663999855518342
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,16,8,128,1,float16,fp8,0,0.016735999286174773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,16,8,128,1,fp8,fp8,0,0.016624000668525696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,16,16,128,1,float16,float16,0,0.016622400283813475
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,16,4,128,1,float16,fp8,0,0.04737760126590729
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,16,16,128,1,fp8,fp8,0,0.016624000668525696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,16,16,128,1,float16,fp8,0,0.016519999504089354
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,16,1,128,1,float16,fp8,0,0.01655520051717758
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,16,1,128,1,fp8,fp8,0,0.016497600078582763
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,16,2,128,1,float16,float16,0,0.016604800522327424
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,16,2,128,1,float16,fp8,0,0.01653759926557541
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,16,2,128,1,fp8,fp8,0,0.016601599752902985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,16,4,128,1,float16,float16,0,0.01656640022993088
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,16,4,128,1,float16,fp8,0,0.016523200273513793
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,16,8,128,1,float16,float16,0,0.01656319946050644
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,16,8,128,1,float16,fp8,0,0.016545599699020384
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,16,8,128,1,fp8,fp8,0,0.016551999747753142
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,16,1,128,1,float16,float16,0,0.2072511911392212
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,16,1,128,1,float16,fp8,0,0.2487504005432129
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,16,1,128,1,fp8,fp8,0,0.2487504005432129
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,16,2,128,1,float16,float16,0,0.2098383903503418
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,16,2,128,1,float16,fp8,0,0.248688006401062
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,16,2,128,1,fp8,fp8,0,0.24844799041748047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,16,8,128,1,float16,fp8,0,0.085207998752594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,16,2,128,1,float16,float16,0,0.016631999611854555
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,16,4,128,1,float16,float16,0,0.22905759811401366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,16,4,128,1,float16,fp8,0,0.24846720695495605
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,16,4,128,1,fp8,fp8,0,0.2470736026763916
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,16,8,128,1,float16,float16,0,0.27029759883880616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,16,8,128,1,float16,fp8,0,0.24805281162261963
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,16,16,128,1,float16,float16,0,0.18153280019760132
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,16,8,128,1,fp8,fp8,0,0.24839038848876954
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,16,16,128,1,float16,float16,0,0.03524959981441498
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,16,16,128,1,float16,fp8,0,0.1634160041809082
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,16,1,128,1,float16,float16,0,0.1117136001586914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,16,16,128,1,fp8,fp8,0,0.16269919872283936
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,16,1,128,1,float16,fp8,0,0.1293328046798706
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,16,2,128,1,float16,float16,0,0.11212480068206787
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,16,1,128,1,fp8,fp8,0,0.12901920080184937
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,16,2,128,1,float16,fp8,0,0.12915680408477784
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,16,4,128,1,fp8,fp8,0,0.016564799845218657
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,16,2,128,1,fp8,fp8,0,0.12899680137634278
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,16,4,128,1,float16,float16,0,0.12070879936218262
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,16,4,128,1,float16,fp8,0,0.12952799797058107
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,16,4,128,1,fp8,fp8,0,0.12931840419769286
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,16,8,128,1,float16,float16,0,0.14092799425125122
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,16,8,128,1,float16,fp8,0,0.13013919591903686
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,16,16,128,1,float16,fp8,0,0.08642719984054566
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,16,8,128,1,fp8,fp8,0,0.12940000295639037
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,16,16,128,1,fp8,fp8,0,0.08676959872245789
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,16,1,128,1,float16,fp8,0,0.06896799802780151
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,16,1,128,1,fp8,fp8,0,0.06780160069465638
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,16,2,128,1,float16,fp8,0,0.06854239702224732
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,16,4,128,1,float16,float16,0,0.066839998960495
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,16,2,128,1,fp8,fp8,0,0.06894559860229492
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,16,4,128,1,float16,fp8,0,0.06986879706382751
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,16,4,128,1,fp8,fp8,0,0.06986879706382751
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,16,8,128,1,float16,float16,0,0.07692319750785828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,16,8,128,1,float16,fp8,0,0.0699887990951538
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,16,8,128,1,fp8,fp8,0,0.0698095977306366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,16,16,128,1,float16,float16,0,0.05547680258750916
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,16,16,128,1,fp8,fp8,0,0.0497296005487442
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,16,1,128,1,float16,float16,0,0.03699679970741272
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,16,1,128,1,float16,fp8,0,0.03988640010356903
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,16,1,128,1,fp8,fp8,0,0.0398496001958847
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,16,2,128,1,float16,float16,0,0.037028801441192624
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,16,2,128,1,float16,fp8,0,0.03966239988803864
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,16,2,128,1,fp8,fp8,0,0.03930239975452423
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,16,2,128,1,float16,fp8,0,0.018638400733470915
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,16,4,128,1,float16,float16,0,0.039392000436782836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,16,4,128,1,fp8,fp8,0,0.04035199880599975
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,16,8,128,1,float16,float16,0,0.04542079865932465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,16,8,128,1,fp8,fp8,0,0.04112319946289063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,16,8,128,1,float16,fp8,0,0.04070560038089752
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,16,16,128,1,float16,float16,0,0.03020319938659668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,16,16,128,1,float16,fp8,0,0.028881600499153136
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,16,16,128,1,fp8,fp8,0,0.02881920039653778
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,16,1,128,1,float16,float16,0,0.02276480048894882
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,16,1,128,1,float16,fp8,0,0.024424000084400176
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,16,1,128,1,fp8,fp8,0,0.02425280064344406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,16,2,128,1,float16,float16,0,0.02274720072746277
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,16,2,128,1,float16,fp8,0,0.024435199797153473
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,16,2,128,1,fp8,fp8,0,0.02451840043067932
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,16,4,128,1,float16,float16,0,0.02346239984035492
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,16,1,128,1,float16,float16,0,0.016675199568271636
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,16,4,128,1,float16,fp8,0,0.024817599356174468
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,16,4,128,1,fp8,fp8,0,0.024315199255943297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,16,8,128,1,float16,float16,0,0.025705599784851076
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,16,8,128,1,float16,fp8,0,0.024817599356174468
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,16,8,128,1,fp8,fp8,0,0.024817599356174468
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,16,16,128,1,float16,float16,0,0.016601599752902985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,16,16,128,1,float16,fp8,0,0.018688000738620758
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,16,16,128,1,fp8,fp8,0,0.0186271995306015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,16,1,128,1,float16,float16,0,0.014553600549697876
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,16,1,128,1,float16,fp8,0,0.016551999747753142
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,16,1,128,1,fp8,fp8,0,0.016628800332546233
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,16,2,128,1,float16,float16,0,0.014550399780273438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,16,1,128,1,float16,float16,0,0.06207519769668579
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,16,2,128,1,float16,fp8,0,0.01663520038127899
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,16,2,128,1,float16,float16,0,0.06222079992294312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,16,4,128,1,float16,float16,0,0.014766399562358857
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,16,4,128,1,fp8,fp8,0,0.0165583997964859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,16,4,128,1,float16,fp8,0,0.01658560037612915
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,16,8,128,1,float16,fp8,0,0.016582399606704712
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,16,8,128,1,fp8,fp8,0,0.016542400419712066
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,16,16,128,1,float16,fp8,0,0.014571200311183929
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,16,16,128,1,float16,fp8,0,0.051337599754333496
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,16,16,128,1,fp8,fp8,0,0.014553600549697876
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,16,1,128,1,float16,float16,0,0.014459200203418732
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,16,1,128,1,float16,fp8,0,0.01456640064716339
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,16,1,128,1,fp8,fp8,0,0.0144896000623703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,16,2,128,1,float16,float16,0,0.014023999869823455
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,16,2,128,1,float16,fp8,0,0.014556799829006196
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,16,2,128,1,fp8,fp8,0,0.014547200500965118
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,16,4,128,1,float16,float16,0,0.014499199390411378
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,16,4,128,1,float16,fp8,0,0.014500799775123595
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,16,4,128,1,fp8,fp8,0,0.014475199580192565
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,16,8,128,1,float16,float16,0,0.014539200067520141
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,16,8,128,1,float16,fp8,0,0.014504000544548035
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,16,16,128,1,float16,float16,0,0.01446399986743927
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,16,16,128,1,float16,fp8,0,0.014497600495815277
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,16,16,128,1,fp8,fp8,0,0.014476799964904785
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,16,1,128,1,float16,float16,0,0.012503999471664428
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,16,1,128,1,float16,fp8,0,0.012545600533485413
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,16,1,128,1,fp8,fp8,0,0.013728000223636627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,16,2,128,1,float16,float16,0,0.013214400410652161
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,16,2,128,1,float16,fp8,0,0.014510400593280792
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,16,2,128,1,fp8,fp8,0,0.014239999651908874
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,16,4,128,1,float16,float16,0,0.013663999736309052
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,16,4,128,1,float16,fp8,0,0.014192000031471252
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,16,4,128,1,fp8,fp8,0,0.014486399292945863
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,16,8,128,1,float16,float16,0,0.014476799964904785
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,16,8,128,1,float16,fp8,0,0.013020800054073333
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,16,8,128,1,fp8,fp8,0,0.012862400710582733
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,16,16,128,1,float16,float16,0,0.013624000549316406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,16,16,128,1,float16,fp8,0,0.012652799487113953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,16,16,128,1,fp8,fp8,0,0.013886399567127228
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,16,1,128,1,float16,float16,0,0.013041600584983826
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,16,16,128,1,float16,float16,0,0.09780480265617371
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,16,1,128,1,float16,fp8,0,0.012548799812793731
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,16,1,128,1,fp8,fp8,0,0.012494400143623352
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,16,2,128,1,fp8,fp8,0,0.016616000235080718
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,16,2,128,1,float16,float16,0,0.013030399382114411
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,16,2,128,1,float16,fp8,0,0.01255040019750595
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,16,8,128,1,float16,float16,0,0.01653279960155487
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,16,2,128,1,fp8,fp8,0,0.012516799569129943
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,16,8,128,1,float16,float16,0,0.012513600289821625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,16,16,128,1,float16,float16,0,0.0146479994058609
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,16,8,128,1,float16,fp8,0,0.012507200241088867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,16,8,128,1,fp8,fp8,0,0.012876799702644348
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,16,1,128,1,float16,fp8,0,0.15231039524078369
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,16,1,128,1,float16,float16,0,0.1351744055747986
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,16,1,128,1,fp8,fp8,0,0.15281599760055542
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,16,2,128,1,float16,float16,0,0.13618240356445313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,16,4,128,1,float16,fp8,0,0.03946399986743927
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,16,8,128,1,fp8,fp8,0,0.01456640064716339
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,16,2,128,1,float16,fp8,0,0.15312000513076782
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,16,4,128,1,float16,float16,0,0.1445024013519287
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,16,4,128,1,float16,fp8,0,0.15226399898529053
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,16,4,128,1,fp8,fp8,0,0.15225600004196166
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,16,8,128,1,float16,fp8,0,0.15238399505615235
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,16,16,128,1,float16,fp8,0,0.09844319820404053
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,16,16,128,1,fp8,fp8,0,0.0981328010559082
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,16,1,128,1,float16,float16,0,0.07345439791679383
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,16,1,128,1,float16,fp8,0,0.08014879822731018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,16,1,128,1,fp8,fp8,0,0.08019199967384338
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,16,2,128,1,float16,float16,0,0.07403039932250977
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,16,2,128,1,float16,fp8,0,0.08058400154113769
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,16,2,128,1,fp8,fp8,0,0.08055199980735779
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,16,4,128,1,float16,float16,0,0.07830719947814942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,16,4,128,1,float16,fp8,0,0.08052480220794678
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,16,4,128,1,float16,float16,0,0.012539200484752655
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,16,4,128,1,float16,fp8,0,0.01257600039243698
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,16,4,128,1,fp8,fp8,0,0.08074560165405273
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,16,4,128,1,fp8,fp8,0,0.012531200051307678
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,16,8,128,1,float16,float16,0,0.08676159977912903
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,16,8,128,1,float16,fp8,0,0.08118720054626465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,16,8,128,1,fp8,fp8,0,0.08040639758110046
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,16,16,128,1,float16,float16,0,0.05550720095634461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,16,16,128,1,float16,fp8,0,0.05337439775466919
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,16,1,128,1,float16,float16,0,0.0393887996673584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,16,1,128,1,float16,fp8,0,0.04316479861736298
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,16,1,128,1,fp8,fp8,0,0.043219199776649474
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,16,2,128,1,float16,float16,0,0.04101920127868652
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,16,2,128,1,float16,fp8,0,0.0432096004486084
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,16,2,128,1,fp8,fp8,0,0.043224000930786134
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,16,4,128,1,float16,float16,0,0.04323039948940277
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,16,4,128,1,float16,fp8,0,0.043249601125717164
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,16,4,128,1,fp8,fp8,0,0.043268799781799316
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,16,8,128,1,float16,float16,0,0.0485040009021759
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,16,8,128,1,float16,fp8,0,0.04326240122318268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,16,2,128,1,fp8,fp8,0,0.1529055953025818
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,16,16,128,1,float16,float16,0,0.033011201024055484
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,16,16,128,1,float16,fp8,0,0.03094559907913208
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,16,16,128,1,fp8,fp8,0,0.03091199994087219
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,16,1,128,1,float16,float16,0,0.024868799746036528
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,16,8,128,1,float16,float16,0,0.1638208031654358
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,16,8,128,1,fp8,fp8,0,0.15258400440216063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,16,1,128,1,fp8,fp8,0,0.026855999231338502
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,16,2,128,1,float16,float16,0,0.02483839988708496
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,16,2,128,1,float16,fp8,0,0.026848000288009644
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,16,2,128,1,fp8,fp8,0,0.026867198944091796
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,16,16,128,1,float16,float16,0,0.10504800081253052
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,16,4,128,1,float16,float16,0,0.026769599318504332
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,16,4,128,1,fp8,fp8,0,0.02682879865169525
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,16,8,128,1,float16,float16,0,0.028854399919509888
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,16,8,128,1,float16,fp8,0,0.026820799708366393
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,16,8,128,1,fp8,fp8,0,0.026812800765037538
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,16,16,128,1,float16,float16,0,0.018662400543689728
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,16,16,128,1,float16,fp8,0,0.018783999979496
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,16,16,128,1,fp8,fp8,0,0.0186831995844841
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,16,1,128,1,float16,float16,0,0.016590400040149687
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,16,1,128,1,float16,fp8,0,0.0166143998503685
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,16,2,128,1,float16,float16,0,0.016638399660587312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,16,2,128,1,float16,fp8,0,0.01668799966573715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,16,2,128,1,fp8,fp8,0,0.016663999855518342
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,16,4,128,1,float16,float16,0,0.016545599699020384
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,16,4,128,1,float16,fp8,0,0.01674560010433197
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,16,4,128,1,fp8,fp8,0,0.016582399606704712
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,16,8,128,1,float16,float16,0,0.01847680062055588
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,16,8,128,1,float16,fp8,0,0.01658399999141693
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,16,8,128,1,fp8,fp8,0,0.016612799465656282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,16,16,128,1,float16,float16,0,0.012542399764060973
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,16,16,128,1,float16,fp8,0,0.012718400359153748
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,16,16,128,1,fp8,fp8,0,0.012825599312782288
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,16,1,128,1,float16,float16,0,0.012465599924325943
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,16,1,128,1,float16,fp8,0,0.012455999851226807
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,16,1,128,1,fp8,fp8,0,0.012532800436019897
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,16,2,128,1,float16,float16,0,0.01244639977812767
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,16,2,128,1,float16,fp8,0,0.012494400143623352
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,16,2,128,1,fp8,fp8,0,0.012432000041007996
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,16,4,128,1,float16,float16,0,0.012479999661445617
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,16,4,128,1,float16,fp8,0,0.012451200187206269
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,16,4,128,1,fp8,fp8,0,0.012508800625801087
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,16,8,128,1,float16,float16,0,0.012476799637079239
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,16,8,128,1,float16,fp8,0,0.012479999661445617
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,16,8,128,1,fp8,fp8,0,0.012444800138473511
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,16,16,128,1,float16,float16,0,0.01071999967098236
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,16,16,128,1,float16,fp8,0,0.010943999886512757
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,16,16,128,1,fp8,fp8,0,0.010955200344324113
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,16,1,128,1,float16,float16,0,0.010380800068378448
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,16,1,128,1,float16,fp8,0,0.01051200032234192
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,16,1,128,1,fp8,fp8,0,0.010491199791431427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,16,2,128,1,float16,float16,0,0.01053600013256073
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,16,2,128,1,float16,fp8,0,0.010465600341558457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,16,2,128,1,fp8,fp8,0,0.010489600151777268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,16,4,128,1,float16,float16,0,0.010414399951696397
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,16,4,128,1,fp8,fp8,0,0.01050880029797554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,16,4,128,1,float16,fp8,0,0.010492800176143647
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,16,8,128,1,float16,float16,0,0.010651200264692306
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,16,8,128,1,fp8,fp8,0,0.010505600273609162
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,16,8,128,1,fp8,fp8,0,0.043351998925209044
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,16,16,128,1,float16,float16,0,0.010502400249242783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,16,16,128,1,float16,fp8,0,0.010526400059461594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,16,1,128,1,float16,fp8,0,0.026872000098228453
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,16,1,128,1,float16,float16,0,0.010454399883747101
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,16,1,128,1,float16,fp8,0,0.01043199971318245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,16,1,128,1,fp8,fp8,0,0.010487999767065048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,16,2,128,1,float16,float16,0,0.010454399883747101
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,16,2,128,1,float16,fp8,0,0.010467199981212616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,16,2,128,1,fp8,fp8,0,0.010441599786281586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,16,4,128,1,float16,float16,0,0.010486400127410889
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,16,4,128,1,float16,fp8,0,0.010436800122261048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,16,4,128,1,fp8,fp8,0,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,16,8,128,1,float16,float16,0,0.010452800244092942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,16,8,128,1,float16,fp8,0,0.010382399708032609
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,16,8,128,1,fp8,fp8,0,0.010443200170993806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,16,16,128,1,float16,float16,0,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,16,1,128,1,fp8,fp8,0,0.01669120043516159
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,16,16,128,1,float16,fp8,0,0.010468800365924836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,16,16,128,1,fp8,fp8,0,0.010382399708032609
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,16,1,128,1,float16,float16,0,0.010452800244092942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,16,1,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,16,1,128,1,fp8,fp8,0,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,16,2,128,1,float16,float16,0,0.010444799810647965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,16,2,128,1,float16,fp8,0,0.010476800054311753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,16,2,128,1,fp8,fp8,0,0.0104032002389431
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,16,4,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,16,4,128,1,float16,fp8,0,0.010449600219726563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,16,8,128,1,float16,float16,0,0.01051200032234192
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,16,8,128,1,float16,fp8,0,0.010388799756765366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,16,8,128,1,fp8,fp8,0,0.010470400005578995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,16,1,128,1,float16,float16,0,0.11356480121612549
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,16,1,128,1,float16,fp8,0,0.1183616042137146
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,16,1,128,1,fp8,fp8,0,0.11871999502182007
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,16,2,128,1,float16,float16,0,0.113755202293396
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,16,2,128,1,float16,fp8,0,0.11904480457305908
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,16,2,128,1,fp8,fp8,0,0.11916320323944092
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,16,4,128,1,float16,float16,0,0.1188431978225708
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,16,4,128,1,float16,fp8,0,0.11918079853057861
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,16,4,128,1,fp8,fp8,0,0.11946239471435546
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,16,8,128,1,float16,float16,0,0.12720160484313964
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,16,16,128,1,fp8,fp8,0,0.0534608006477356
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,16,8,128,1,float16,fp8,0,0.11937600374221802
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,16,8,128,1,fp8,fp8,0,0.1197424054145813
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,16,16,128,1,float16,float16,0,0.07540159821510314
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,16,16,128,1,float16,fp8,0,0.07281919717788696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,16,16,128,1,fp8,fp8,0,0.07350720167160034
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,16,1,128,1,float16,float16,0,0.06104000210762024
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,16,1,128,1,fp8,fp8,0,0.06331040263175965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,16,8,128,1,float16,fp8,0,0.010480000078678131
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,16,2,128,1,float16,float16,0,0.06110240221023559
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,16,2,128,1,float16,fp8,0,0.06261759996414185
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,16,2,128,1,fp8,fp8,0,0.06270719766616821
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,16,4,128,1,float16,float16,0,0.06567839980125427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,16,4,128,1,float16,fp8,0,0.06256639957427979
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,16,4,128,1,fp8,fp8,0,0.06311360001564026
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,16,8,128,1,float16,float16,0,0.0701471984386444
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,16,16,128,1,fp8,fp8,0,0.010476800054311753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,16,8,128,1,float16,fp8,0,0.06308320164680481
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,16,16,128,1,float16,float16,0,0.042423999309539794
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,16,8,128,1,fp8,fp8,0,0.06329759955406189
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,16,16,128,1,fp8,fp8,0,0.039164799451828006
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,16,4,128,1,float16,fp8,0,0.02677600085735321
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,16,1,128,1,float16,float16,0,0.0350735992193222
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,16,1,128,1,float16,fp8,0,0.035068801045417784
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,16,1,128,1,fp8,fp8,0,0.03510560095310211
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,16,2,128,1,float16,float16,0,0.03504799902439117
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,16,2,128,1,float16,fp8,0,0.03514400124549866
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,16,2,128,1,fp8,fp8,0,0.03504799902439117
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,16,4,128,1,float16,float16,0,0.0366703987121582
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,16,4,128,1,float16,fp8,0,0.0350928008556366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,16,4,128,1,fp8,fp8,0,0.03505760133266449
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,16,8,128,1,float16,float16,0,0.03843519985675812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,16,8,128,1,fp8,fp8,0,0.03508960008621216
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,16,16,128,1,float16,float16,0,0.02479040026664734
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,16,16,128,1,float16,fp8,0,0.024875199794769286
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,16,1,128,1,float16,float16,0,0.022375999391078948
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,16,1,128,1,float16,fp8,0,0.02279520034790039
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,16,1,128,1,fp8,fp8,0,0.022763200104236603
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,16,2,128,1,float16,float16,0,0.022758400440216063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,16,4,128,1,fp8,fp8,0,0.010444799810647965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,16,2,128,1,float16,fp8,0,0.022732800245285033
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,16,4,128,1,float16,float16,0,0.022729599475860597
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,16,2,128,1,fp8,fp8,0,0.022755199670791627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,16,4,128,1,float16,fp8,0,0.022784000635147093
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,16,4,128,1,fp8,fp8,0,0.02272160053253174
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,16,8,128,1,float16,fp8,0,0.022767999768257143
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,16,8,128,1,fp8,fp8,0,0.022731199860572815
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,16,8,128,1,float16,float16,0,0.022838400304317476
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,16,16,128,1,float16,float16,0,0.01661760061979294
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,16,16,128,1,float16,fp8,0,0.016627199947834015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,16,16,128,1,fp8,fp8,0,0.016620799899101257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,16,1,128,1,float16,float16,0,0.014528000354766845
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,16,1,128,1,float16,fp8,0,0.014560000598430633
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,16,1,128,1,fp8,fp8,0,0.014535999298095703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,16,2,128,1,float16,fp8,0,0.014582400023937226
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,16,2,128,1,fp8,fp8,0,0.014552000164985656
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,16,4,128,1,float16,float16,0,0.014563199877738953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,16,4,128,1,float16,fp8,0,0.014563199877738953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,16,8,128,1,float16,float16,0,0.015065599977970124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,16,8,128,1,float16,fp8,0,0.014539200067520141
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,16,8,128,1,fp8,fp8,0,0.014921599626541137
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,16,16,128,1,float16,float16,0,0.01252640038728714
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,16,16,128,1,float16,fp8,0,0.01244639977812767
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,16,16,128,1,fp8,fp8,0,0.012439999729394913
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,16,1,128,1,float16,float16,0,0.010465600341558457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,16,1,128,1,float16,fp8,0,0.0632528007030487
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,16,1,128,1,fp8,fp8,0,0.01053759977221489
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,16,2,128,1,float16,float16,0,0.010473600029945374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,16,2,128,1,float16,fp8,0,0.010468800365924836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,16,4,128,1,float16,float16,0,0.010540799796581268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,16,2,128,1,fp8,fp8,0,0.01043040007352829
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,16,4,128,1,float16,fp8,0,0.010487999767065048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,16,4,128,1,fp8,fp8,0,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,16,8,128,1,float16,float16,0,0.01101600006222725
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,16,8,128,1,float16,fp8,0,0.010491199791431427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,16,16,128,1,float16,fp8,0,0.03918560147285462
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,16,16,128,1,float16,float16,0,0.010428799688816071
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,16,16,128,1,float16,fp8,0,0.010480000078678131
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,16,16,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,16,1,128,1,float16,float16,0,0.01037440001964569
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,16,1,128,1,float16,fp8,0,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,16,1,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,16,2,128,1,float16,float16,0,0.010380800068378448
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,16,8,128,1,float16,fp8,0,0.035094401240348815
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,16,2,128,1,float16,fp8,0,0.010388799756765366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,16,2,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,16,4,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,16,4,128,1,float16,fp8,0,0.010382399708032609
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,16,4,128,1,fp8,fp8,0,0.01050880029797554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,16,8,128,1,float16,float16,0,0.010366400331258773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,16,8,128,1,float16,fp8,0,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,16,16,128,1,float16,float16,0,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,16,16,128,1,float16,fp8,0,0.01048320010304451
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,16,16,128,1,fp8,fp8,0,0.010369600355625152
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,16,1,128,1,float16,float16,0,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,16,1,128,1,float16,fp8,0,0.010372799634933472
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,16,1,128,1,fp8,fp8,0,0.010409600287675857
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,16,2,128,1,float16,float16,0,0.014579200744628906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,16,2,128,1,float16,fp8,0,0.010462400317192078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,16,4,128,1,fp8,fp8,0,0.014569599926471711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,16,2,128,1,fp8,fp8,0,0.010489600151777268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,16,4,128,1,float16,float16,0,0.01043360009789467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,16,4,128,1,float16,fp8,0,0.01048159971833229
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,16,8,128,1,float16,float16,0,0.010463999956846238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,16,8,128,1,float16,fp8,0,0.01040000021457672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,16,8,128,1,fp8,fp8,0,0.010384000092744827
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,16,16,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,16,16,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,16,16,128,1,fp8,fp8,0,0.010428799688816071
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,16,1,128,1,float16,fp8,0,0.010499200224876404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,16,1,128,1,float16,float16,0,0.01045759990811348
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,16,1,128,1,float16,fp8,0,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,16,1,128,1,fp8,fp8,0,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,16,2,128,1,float16,float16,0,0.010446400195360184
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,16,2,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,16,4,128,1,float16,float16,0,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,16,4,128,1,float16,fp8,0,0.010372799634933472
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,16,8,128,1,fp8,fp8,0,0.010427200049161912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,16,8,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,16,8,128,1,float16,fp8,0,0.010364799946546554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,16,8,128,1,fp8,fp8,0,0.010371199995279311
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,16,16,128,1,fp8,fp8,0,0.024740800261497498
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,16,1,128,1,float16,float16,0,0.10113760232925414
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,16,1,128,1,fp8,fp8,0,0.10077760219573975
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,16,8,128,1,fp8,fp8,0,0.010356800258159637
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,16,2,128,1,float16,fp8,0,0.10052000284194947
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,16,2,128,1,fp8,fp8,0,0.10100480318069457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,16,4,128,1,float16,float16,0,0.10327839851379395
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,16,4,128,1,float16,fp8,0,0.10090559720993042
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,16,4,128,1,fp8,fp8,0,0.10135359764099121
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,16,8,128,1,float16,float16,0,0.10873440504074097
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,16,8,128,1,float16,fp8,0,0.10198080539703369
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,16,2,128,1,float16,float16,0,0.01045759990811348
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,16,16,128,1,float16,float16,0,0.06364480257034302
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,16,8,128,1,fp8,fp8,0,0.10166079998016357
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,16,16,128,1,float16,fp8,0,0.05950559973716736
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,16,16,128,1,fp8,fp8,0,0.05917919874191284
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,16,1,128,1,float16,float16,0,0.05586240291595459
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,16,4,128,1,fp8,fp8,0,0.010502400249242783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,16,1,128,1,fp8,fp8,0,0.05486080050468445
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,16,2,128,1,float16,float16,0,0.05575039982795715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,16,2,128,1,float16,fp8,0,0.055287998914718625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,16,2,128,1,fp8,fp8,0,0.055048000812530515
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,16,4,128,1,float16,float16,0,0.05659040212631226
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,16,4,128,1,float16,fp8,0,0.054816001653671266
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,16,4,128,1,fp8,fp8,0,0.055216002464294436
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,16,8,128,1,float16,fp8,0,0.05511040091514587
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,16,8,128,1,fp8,fp8,0,0.05523999929428101
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,16,16,128,1,float16,float16,0,0.034995201230049136
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,16,16,128,1,float16,fp8,0,0.033180800080299375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,16,16,128,1,fp8,fp8,0,0.03304319977760315
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,16,1,128,1,float16,float16,0,0.03280960023403168
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,16,1,128,1,float16,fp8,0,0.03108479976654053
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,16,1,128,1,fp8,fp8,0,0.031224000453948974
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,16,2,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,16,2,128,1,float16,fp8,0,0.031123200058937074
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,16,2,128,1,fp8,fp8,0,0.03115679919719696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,16,4,128,1,float16,float16,0,0.032872000336647035
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,16,4,128,1,float16,fp8,0,0.030987200140953065
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,16,4,128,1,fp8,fp8,0,0.030943998694419862
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,16,8,128,1,float16,fp8,0,0.03096640110015869
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,16,4,128,1,fp8,fp8,0,0.010518400371074677
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,16,8,128,1,float16,float16,0,0.034329599142074584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,16,8,128,1,fp8,fp8,0,0.03102560043334961
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,16,16,128,1,float16,float16,0,0.02186879962682724
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,16,16,128,1,float16,fp8,0,0.0207056000828743
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,16,16,128,1,fp8,fp8,0,0.020713600516319274
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,16,1,128,1,float16,float16,0,0.02069920003414154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,16,1,128,1,float16,fp8,0,0.020694400370121
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,16,1,128,1,fp8,fp8,0,0.020662400126457214
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,16,2,128,1,float16,float16,0,0.020712000131607056
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,16,2,128,1,float16,fp8,0,0.02072480022907257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,16,4,128,1,float16,float16,0,0.020787200331687926
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,16,4,128,1,float16,fp8,0,0.02069920003414154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,16,1,128,1,float16,fp8,0,0.10075680017471314
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,16,8,128,1,float16,float16,0,0.020678399503231047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,16,8,128,1,float16,fp8,0,0.020695999264717102
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,16,8,128,1,fp8,fp8,0,0.02072640061378479
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,16,2,128,1,float16,float16,0,0.10150400400161744
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,16,16,128,1,float16,float16,0,0.014507199823856353
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,16,16,128,1,float16,fp8,0,0.014558400213718414
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,16,16,128,1,fp8,fp8,0,0.01449279934167862
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,16,1,128,1,float16,fp8,0,0.014374400675296783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,16,1,128,1,fp8,fp8,0,0.01451680064201355
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,16,2,128,1,float16,float16,0,0.014574399590492249
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,16,2,128,1,float16,fp8,0,0.014480000734329224
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,16,2,128,1,fp8,fp8,0,0.014473600685596466
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,16,4,128,1,float16,float16,0,0.014480000734329224
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,16,4,128,1,float16,fp8,0,0.014473600685596466
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,16,4,128,1,fp8,fp8,0,0.014478400349617004
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,16,8,128,1,float16,float16,0,0.014572800695896148
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,16,8,128,1,float16,fp8,0,0.014507199823856353
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,16,1,128,1,float16,fp8,0,0.05502079725265503
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,16,8,128,1,fp8,fp8,0,0.014468799531459808
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,16,16,128,1,float16,float16,0,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,16,16,128,1,float16,fp8,0,0.01048159971833229
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,16,16,128,1,fp8,fp8,0,0.010476800054311753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,16,1,128,1,float16,float16,0,0.010441599786281586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,16,1,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,16,1,128,1,fp8,fp8,0,0.010496000200510025
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,16,8,128,1,float16,float16,0,0.059575998783111574
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,16,2,128,1,float16,float16,0,0.010444799810647965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,16,2,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,16,2,128,1,fp8,fp8,0,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,16,4,128,1,float16,float16,0,0.010476800054311753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,16,4,128,1,float16,fp8,0,0.0104032002389431
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,16,4,128,1,fp8,fp8,0,0.010416000336408614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,16,8,128,1,float16,float16,0,0.010471999645233154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,16,8,128,1,float16,fp8,0,0.010489600151777268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,16,8,128,1,fp8,fp8,0,0.010412800312042236
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,16,16,128,1,float16,float16,0,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,16,16,128,1,float16,fp8,0,0.010419200360774993
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,16,2,128,1,float16,float16,0,0.0324176013469696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,16,16,128,1,fp8,fp8,0,0.010411199927330018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,16,1,128,1,float16,fp8,0,0.010372799634933472
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,16,1,128,1,fp8,fp8,0,0.010385599732398988
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,16,2,128,1,float16,float16,0,0.010364799946546554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,16,1,128,1,float16,float16,0,0.010384000092744827
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,16,2,128,1,float16,fp8,0,0.010382399708032609
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,16,4,128,1,float16,float16,0,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,16,4,128,1,float16,fp8,0,0.010385599732398988
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,16,4,128,1,fp8,fp8,0,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,16,8,128,1,float16,fp8,0,0.010489600151777268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,16,8,128,1,fp8,fp8,0,0.010390400141477584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,16,16,128,1,float16,float16,0,0.01048320010304451
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,16,16,128,1,float16,fp8,0,0.009708800166845322
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,16,16,128,1,fp8,fp8,0,0.01037919968366623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,16,1,128,1,float16,float16,0,0.009600000083446502
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,16,1,128,1,float16,fp8,0,0.010436800122261048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,16,1,128,1,fp8,fp8,0,0.00936800017952919
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,16,2,128,1,float16,float16,0,0.01040000021457672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,16,2,128,1,float16,fp8,0,0.00942080020904541
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,16,2,128,1,fp8,fp8,0,0.02069759964942932
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,16,4,128,1,float16,fp8,0,0.009625600278377533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,16,4,128,1,float16,float16,0,0.01005759984254837
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,16,4,128,1,fp8,fp8,0,0.020654399693012238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,16,4,128,1,fp8,fp8,0,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,16,8,128,1,float16,float16,0,0.01045759990811348
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,16,8,128,1,fp8,fp8,0,0.01043199971318245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,16,16,128,1,float16,float16,0,0.010921599715948105
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,16,16,128,1,float16,fp8,0,0.010452800244092942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,16,16,128,1,fp8,fp8,0,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,16,1,128,1,float16,float16,0,0.010361599922180175
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,16,1,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,16,1,128,1,fp8,fp8,0,0.009433600306510925
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,16,2,128,1,float16,float16,0,0.010371199995279311
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,16,2,128,1,float16,fp8,0,0.01000479981303215
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,16,2,128,1,fp8,fp8,0,0.010409600287675857
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,16,4,128,1,float16,float16,0,0.01043040007352829
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,16,4,128,1,float16,fp8,0,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,16,8,128,1,float16,float16,0,0.0104032002389431
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,16,8,128,1,float16,fp8,0,0.010409600287675857
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,16,8,128,1,fp8,fp8,0,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,16,1,128,1,float16,float16,0,0.09873440265655517
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,16,1,128,1,float16,fp8,0,0.09486079812049866
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,16,1,128,1,fp8,fp8,0,0.09459199905395507
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,16,2,128,1,float16,float16,0,0.09814720153808594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,16,2,128,1,float16,fp8,0,0.09495360255241395
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,16,2,128,1,fp8,fp8,0,0.09490079879760742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,16,4,128,1,float16,float16,0,0.09961599707603455
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,16,2,128,1,fp8,fp8,0,0.010424000024795533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,16,8,128,1,float16,float16,0,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,16,4,128,1,float16,fp8,0,0.09520639777183533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,16,4,128,1,fp8,fp8,0,0.09364799857139587
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,16,16,128,1,float16,float16,0,0.05715360045433045
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,16,8,128,1,float16,fp8,0,0.09472000002861022
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,16,8,128,1,fp8,fp8,0,0.09490399956703185
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,16,8,128,1,float16,float16,0,0.10162880420684814
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,16,16,128,1,float16,fp8,0,0.05437759757041931
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,16,1,128,1,float16,float16,0,0.05578240156173706
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,16,1,128,1,fp8,fp8,0,0.052107197046279904
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,16,2,128,1,float16,float16,0,0.05511999726295471
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,16,2,128,1,float16,fp8,0,0.05155519843101501
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,16,2,128,1,fp8,fp8,0,0.05220800042152405
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,16,4,128,1,float16,float16,0,0.05452479720115662
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,16,4,128,1,float16,fp8,0,0.05225920081138611
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,16,8,128,1,float16,fp8,0,0.009497600048780442
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,16,4,128,1,fp8,fp8,0,0.05145599842071533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,16,8,128,1,float16,float16,0,0.056655997037887575
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,16,8,128,1,fp8,fp8,0,0.05167199969291687
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,16,1,128,1,float16,float16,0,0.014497600495815277
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,16,16,128,1,float16,float16,0,0.03295679986476898
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,16,16,128,1,fp8,fp8,0,0.030926400423049928
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,16,1,128,1,float16,fp8,0,0.030487999320030212
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,16,1,128,1,float16,float16,0,0.031508800387382505
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,16,4,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,16,1,128,1,fp8,fp8,0,0.030707201361656188
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,16,2,128,1,float16,float16,0,0.03144319951534271
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,16,2,128,1,float16,fp8,0,0.030590400099754333
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,16,2,128,1,fp8,fp8,0,0.030206400156021117
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,16,4,128,1,float16,float16,0,0.03223679959774017
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,16,4,128,1,float16,fp8,0,0.029764801263809204
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,16,4,128,1,fp8,fp8,0,0.030112001299858093
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,16,8,128,1,float16,float16,0,0.03281440138816834
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,16,8,128,1,float16,fp8,0,0.030502399802207945
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,16,8,128,1,fp8,fp8,0,0.030622398853302
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,16,16,128,1,float16,float16,0,0.02070239931344986
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,16,16,128,1,float16,fp8,0,0.020729599893093108
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,16,16,128,1,fp8,fp8,0,0.020585599541664123
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,16,1,128,1,float16,float16,0,0.020716799795627593
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,16,1,128,1,float16,fp8,0,0.020268799364566804
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,16,1,128,1,fp8,fp8,0,0.020310400426387785
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,16,2,128,1,float16,fp8,0,0.01926079988479614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,16,2,128,1,float16,float16,0,0.020692799985408784
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,16,2,128,1,fp8,fp8,0,0.01912959963083267
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,16,4,128,1,float16,float16,0,0.0206496000289917
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,16,4,128,1,float16,fp8,0,0.01865600049495697
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,16,4,128,1,fp8,fp8,0,0.02037599980831146
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,16,8,128,1,float16,float16,0,0.020652799308300017
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,16,8,128,1,float16,fp8,0,0.02024960070848465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,16,8,128,1,fp8,fp8,0,0.018940800428390504
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,16,16,128,1,float16,float16,0,0.014531199634075165
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,16,16,128,1,float16,fp8,0,0.01451839953660965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,16,16,128,1,fp8,fp8,0,0.01454080045223236
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,16,1,128,1,float16,float16,0,0.013841600716114044
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,16,1,128,1,float16,fp8,0,0.013625599443912506
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,16,1,128,1,fp8,fp8,0,0.013140800595283508
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,16,2,128,1,float16,float16,0,0.013596799969673157
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,16,2,128,1,float16,fp8,0,0.014547200500965118
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,16,2,128,1,fp8,fp8,0,0.012878400087356568
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,16,4,128,1,float16,fp8,0,0.012875199317932129
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,16,4,128,1,fp8,fp8,0,0.012996800243854523
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,16,8,128,1,float16,float16,0,0.014531199634075165
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,16,16,128,1,fp8,fp8,0,0.054958397150039674
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,16,1,128,1,float16,fp8,0,0.051841598749160764
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,16,8,128,1,fp8,fp8,0,0.014180800318717957
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,16,16,128,1,float16,float16,0,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,16,16,128,1,float16,fp8,0,0.010500799864530563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,16,16,128,1,fp8,fp8,0,0.010471999645233154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,16,1,128,1,float16,float16,0,0.01048159971833229
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,16,1,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,16,1,128,1,fp8,fp8,0,0.010526400059461594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,16,2,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,16,2,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,16,2,128,1,fp8,fp8,0,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,16,8,128,1,float16,fp8,0,0.052035200595855716
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,16,4,128,1,float16,float16,0,0.010476800054311753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,16,4,128,1,float16,fp8,0,0.010407999902963639
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,16,16,128,1,float16,fp8,0,0.030895999073982237
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,16,4,128,1,fp8,fp8,0,0.01045759990811348
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,16,8,128,1,float16,float16,0,0.010480000078678131
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,16,8,128,1,float16,fp8,0,0.010416000336408614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,16,16,128,1,float16,float16,0,0.010470400005578995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,16,16,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,16,16,128,1,fp8,fp8,0,0.010375999659299851
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,16,1,128,1,float16,float16,0,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,16,1,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,16,1,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,16,2,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,16,2,128,1,float16,fp8,0,0.010371199995279311
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,16,2,128,1,fp8,fp8,0,0.010476800054311753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,16,4,128,1,float16,fp8,0,0.010387200117111205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,16,4,128,1,fp8,fp8,0,0.010412800312042236
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,16,8,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,16,8,128,1,float16,fp8,0,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,16,8,128,1,fp8,fp8,0,0.010382399708032609
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,16,16,128,1,float16,float16,0,0.010393600165843963
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,16,16,128,1,float16,fp8,0,0.009329599887132644
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,16,16,128,1,fp8,fp8,0,0.010407999902963639
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,16,1,128,1,float16,fp8,0,0.008366400003433227
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,16,1,128,1,fp8,fp8,0,0.009355200082063675
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,16,1,128,1,float16,float16,0,0.009433600306510925
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,16,2,128,1,float16,float16,0,0.00936800017952919
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,16,2,128,1,float16,fp8,0,0.010092800110578537
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,16,2,128,1,fp8,fp8,0,0.009337600320577621
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,16,4,128,1,float16,float16,0,0.009556800127029419
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,16,4,128,1,fp8,fp8,0,0.009057600051164627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,16,8,128,1,float16,float16,0,0.010388799756765366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,16,4,128,1,float16,fp8,0,0.009679999947547913
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,16,8,128,1,fp8,fp8,0,0.010382399708032609
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,16,16,128,1,float16,float16,0,0.010375999659299851
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,16,16,128,1,float16,fp8,0,0.010371199995279311
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,16,16,128,1,fp8,fp8,0,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,16,1,128,1,float16,fp8,0,0.010384000092744827
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,16,1,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,16,2,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,16,2,128,1,float16,fp8,0,0.010363200306892395
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,16,2,128,1,fp8,fp8,0,0.0104032002389431
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,16,4,128,1,float16,float16,0,0.010372799634933472
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,16,4,128,1,float16,fp8,0,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,16,4,128,1,float16,float16,0,0.014550399780273438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,16,8,128,1,float16,float16,0,0.010452800244092942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,16,8,128,1,float16,fp8,0,0.009635200351476669
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,16,8,128,1,fp8,fp8,0,0.01005759984254837
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,16,2,128,1,fp8,fp8,0,0.010467199981212616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,16,1,128,1,float16,float16,0,0.09842879772186279
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,16,1,128,1,float16,fp8,0,0.08994560241699219
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,16,1,128,1,fp8,fp8,0,0.08975039720535279
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,16,2,128,1,float16,float16,0,0.09723039865493774
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,16,2,128,1,float16,fp8,0,0.08975679874420166
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,16,8,128,1,fp8,fp8,0,0.010427200049161912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,16,2,128,1,fp8,fp8,0,0.089273601770401
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,16,4,128,1,float16,float16,0,0.09520320296287536
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,16,4,128,1,float16,fp8,0,0.08986560106277466
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,16,4,128,1,float16,float16,0,0.010462400317192078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,16,8,128,1,float16,float16,0,0.09720799922943116
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,16,8,128,1,float16,fp8,0,0.08984479904174805
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,16,8,128,1,fp8,fp8,0,0.08966720104217529
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,16,16,128,1,float16,float16,0,0.05429440140724182
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,16,16,128,1,float16,fp8,0,0.049635198712348935
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,16,16,128,1,fp8,fp8,0,0.049963200092315675
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,16,1,128,1,float16,float16,0,0.05397599935531616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,16,1,128,1,float16,fp8,0,0.04936639964580536
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,16,8,128,1,float16,fp8,0,0.009337600320577621
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,16,1,128,1,fp8,fp8,0,0.04951359927654266
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,16,2,128,1,float16,float16,0,0.05379679799079895
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,16,2,128,1,float16,fp8,0,0.04935680031776428
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,16,2,128,1,fp8,fp8,0,0.04973919987678528
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,16,4,128,1,float16,float16,0,0.05386880040168762
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,16,4,128,1,fp8,fp8,0,0.0497408002614975
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,16,1,128,1,float16,float16,0,0.010411199927330018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,16,8,128,1,float16,float16,0,0.05375679731369019
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,16,8,128,1,float16,fp8,0,0.049660798907279965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,16,8,128,1,fp8,fp8,0,0.04992319941520691
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,16,16,128,1,float16,float16,0,0.031068798899650574
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,16,16,128,1,float16,fp8,0,0.028934401273727418
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,16,16,128,1,fp8,fp8,0,0.028932800889015196
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,16,1,128,1,float16,float16,0,0.030980798602104186
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,16,1,128,1,float16,fp8,0,0.02890079915523529
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,16,1,128,1,fp8,fp8,0,0.028960001468658448
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,16,2,128,1,float16,float16,0,0.03094879984855652
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,16,2,128,1,fp8,fp8,0,0.02900159955024719
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,16,4,128,1,float16,float16,0,0.031044799089431762
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,16,4,128,1,float16,fp8,0,0.028915199637413024
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,16,4,128,1,fp8,fp8,0,0.02890399992465973
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,16,4,128,1,fp8,fp8,0,0.009031999856233597
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,16,8,128,1,float16,float16,0,0.03097440004348755
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,16,8,128,1,float16,fp8,0,0.02892799973487854
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,16,8,128,1,fp8,fp8,0,0.02890399992465973
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,16,16,128,1,float16,float16,0,0.020428800582885744
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,16,16,128,1,float16,fp8,0,0.01868959963321686
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,16,16,128,1,fp8,fp8,0,0.01865600049495697
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,16,1,128,1,float16,float16,0,0.01932000070810318
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,16,1,128,1,float16,fp8,0,0.018665599822998046
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,16,1,128,1,fp8,fp8,0,0.01860480010509491
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,16,2,128,1,float16,float16,0,0.020286400616168977
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,16,8,128,1,float16,fp8,0,0.013710400462150574
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,16,2,128,1,fp8,fp8,0,0.018612800538539885
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,16,4,128,1,float16,float16,0,0.020636799931526183
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,16,2,128,1,float16,fp8,0,0.018713599443435668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,16,4,128,1,fp8,fp8,0,0.018676799535751343
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,16,8,128,1,float16,float16,0,0.02053920030593872
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,16,8,128,1,float16,fp8,0,0.018681600689888
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,16,16,128,1,float16,float16,0,0.014481599628925323
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,16,16,128,1,float16,fp8,0,0.01255200058221817
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,16,8,128,1,fp8,fp8,0,0.01860159933567047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,16,16,128,1,fp8,fp8,0,0.01255040019750595
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,16,1,128,1,float16,float16,0,0.014580799639225006
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,16,1,128,1,float16,fp8,0,0.012566399574279786
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,16,1,128,1,fp8,fp8,0,0.012449599802494049
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,16,2,128,1,float16,float16,0,0.014550399780273438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,16,2,128,1,float16,fp8,0,0.013262400031089782
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,16,2,128,1,fp8,fp8,0,0.012564800679683685
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,16,4,128,1,float16,float16,0,0.014478400349617004
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,16,4,128,1,float16,fp8,0,0.012649600207805634
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,16,8,128,1,float16,float16,0,0.01451839953660965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,16,8,128,1,float16,fp8,0,0.013158400356769562
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,16,4,128,1,fp8,fp8,0,0.01255200058221817
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,16,8,128,1,fp8,fp8,0,0.012833599746227265
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,16,16,128,1,float16,float16,0,0.012411200255155564
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,16,16,128,1,float16,fp8,0,0.01043360009789467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,16,16,128,1,fp8,fp8,0,0.010491199791431427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,16,1,128,1,float16,float16,0,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,16,1,128,1,float16,fp8,0,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,16,1,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,16,2,128,1,float16,float16,0,0.011240000277757645
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,16,2,128,1,float16,fp8,0,0.010384000092744827
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,16,4,128,1,float16,float16,0,0.01088000014424324
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,16,4,128,1,float16,fp8,0,0.010444799810647965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,16,4,128,1,fp8,fp8,0,0.010451199859380722
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,16,8,128,1,float16,float16,0,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,16,8,128,1,float16,fp8,0,0.010367999970912933
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,16,8,128,1,fp8,fp8,0,0.010480000078678131
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,16,16,128,1,float16,float16,0,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,16,16,128,1,float16,fp8,0,0.010411199927330018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,16,16,128,1,fp8,fp8,0,0.010390400141477584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,16,1,128,1,float16,float16,0,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,16,1,128,1,float16,fp8,0,0.010369600355625152
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,16,1,128,1,fp8,fp8,0,0.010396800190210342
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,16,2,128,1,float16,float16,0,0.010384000092744827
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,16,2,128,1,float16,fp8,0,0.010492800176143647
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,16,2,128,1,fp8,fp8,0,0.010424000024795533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,16,4,128,1,float16,fp8,0,0.01040479987859726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,16,4,128,1,fp8,fp8,0,0.010409600287675857
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,16,4,128,1,float16,fp8,0,0.04949440062046051
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,16,8,128,1,float16,float16,0,0.010388799756765366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,16,8,128,1,float16,fp8,0,0.010375999659299851
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,16,8,128,1,fp8,fp8,0,0.009155199676752091
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,16,16,128,1,float16,float16,0,0.010438399761915207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,16,16,128,1,float16,fp8,0,0.00851840004324913
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,16,16,128,1,fp8,fp8,0,0.008463999629020691
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,16,1,128,1,float16,float16,0,0.008556800335645676
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,16,1,128,1,float16,fp8,0,0.008449599891901017
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,16,2,128,1,float16,fp8,0,0.028867200016975403
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,16,2,128,1,float16,fp8,0,0.008500800281763077
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,16,2,128,1,float16,float16,0,0.01000479981303215
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,16,2,128,1,fp8,fp8,0,0.010361599922180175
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,16,4,128,1,float16,float16,0,0.01000479981303215
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,16,4,128,1,float16,fp8,0,0.00976639986038208
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,16,4,128,1,fp8,fp8,0,0.008473599702119828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,16,8,128,1,float16,float16,0,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,16,8,128,1,float16,fp8,0,0.00846880003809929
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,16,8,128,1,fp8,fp8,0,0.009220799803733826
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,16,16,128,1,float16,float16,0,0.009372799843549728
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,16,16,128,1,float16,fp8,0,0.00859839990735054
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,16,16,128,1,fp8,fp8,0,0.009297599643468856
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,16,1,128,1,float16,float16,0,0.009836799651384353
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,16,1,128,1,float16,fp8,0,0.008654399961233138
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,16,1,128,1,fp8,fp8,0,0.008473599702119828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,16,2,128,1,float16,float16,0,0.009694399684667588
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,16,2,128,1,float16,fp8,0,0.008448000252246856
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,16,4,128,1,float16,fp8,0,0.01868479996919632
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,16,2,128,1,fp8,fp8,0,0.008851200342178345
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,16,4,128,1,float16,float16,0,0.008489599823951722
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,16,4,128,1,float16,fp8,0,0.009115199744701385
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,16,8,128,1,float16,float16,0,0.008710400015115739
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,16,8,128,1,float16,fp8,0,0.008423999696969987
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,16,8,128,1,fp8,fp8,0,0.008404800295829773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,16,4,128,1,fp8,fp8,0,0.08937119841575622
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,16,2,128,1,fp8,fp8,0,0.010527999699115753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,16,4,128,1,float16,float16,0,0.010412800312042236
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,16,1,128,1,fp8,fp8,0,0.008433599770069123
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,16,4,128,1,fp8,fp8,0,0.008462399989366532
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,12,1,128,1,float16,fp8,0,4.274609756469727
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,12,1,128,1,fp8,fp8,0,4.334088134765625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,12,1,128,1,float16,float16,0,5.4279121398925785
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,12,2,128,1,float16,fp8,0,4.311502456665039
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,12,2,128,1,float16,float16,0,5.416999816894531
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,12,2,128,1,fp8,fp8,0,4.305412673950196
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,12,4,128,1,float16,fp8,0,4.346771240234375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,12,4,128,1,fp8,fp8,0,4.376339340209961
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,12,4,128,1,float16,float16,0,5.459936141967773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,1,128,1,float16,float16,0,2.7472143173217773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,1,128,1,fp8,fp8,0,2.2351711273193358
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,12,128,1,float16,fp8,0,2.273472023010254
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,12,128,1,fp8,fp8,0,2.270015907287598
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,2,128,1,float16,float16,0,2.6602975845336916
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,2,128,1,float16,fp8,0,2.5104352951049806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,2,128,1,fp8,fp8,0,2.4890480041503906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,4,128,1,float16,fp8,0,2.2213167190551757
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,1,128,1,float16,fp8,0,2.2112255096435547
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,12,128,1,float16,fp8,0,1.2422112464904784
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,4,128,1,float16,float16,0,2.655124855041504
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,12,128,1,fp8,fp8,0,1.2217424392700196
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,4,128,1,fp8,fp8,0,2.2351184844970704
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,1,128,1,float16,fp8,0,1.1607343673706054
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,1,128,1,float16,float16,0,1.3200495719909668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,1,128,1,fp8,fp8,0,1.1684399604797364
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,2,128,1,float16,fp8,0,1.1693807601928712
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,2,128,1,float16,float16,0,1.4788928031921387
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,2,128,1,fp8,fp8,0,1.1985440254211426
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,12,128,1,fp8,fp8,0,0.6707568168640137
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,4,128,1,fp8,fp8,0,1.1644512176513673
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,4,128,1,float16,fp8,0,1.3828816413879395
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,12,128,1,float16,fp8,0,0.7749567985534668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,1,128,1,float16,float16,0,0.7051663875579834
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,1,128,1,fp8,fp8,0,0.6908944129943848
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,2,128,1,float16,float16,0,0.7789504051208496
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,2,128,1,float16,fp8,0,0.6431568145751954
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,2,128,1,fp8,fp8,0,0.656166410446167
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,4,128,1,float16,float16,0,0.7288544178009033
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,4,128,1,float16,fp8,0,0.6439087867736817
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,4,128,1,fp8,fp8,0,0.6487984180450439
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,4,128,1,float16,float16,0,1.324174404144287
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,1,128,1,float16,fp8,0,0.6416384220123291
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,12,1,128,1,float16,fp8,0,2.5173599243164064
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,12,1,128,1,float16,float16,0,3.1026399612426756
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,12,1,128,1,fp8,fp8,0,2.5394479751586916
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,12,2,128,1,float16,float16,0,2.9020559310913088
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,12,2,128,1,float16,fp8,0,2.5203231811523437
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,12,2,128,1,fp8,fp8,0,2.533291244506836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,12,4,128,1,float16,fp8,0,2.540563201904297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,12,4,128,1,float16,float16,0,3.3244911193847657
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,12,128,1,float16,fp8,0,1.3663887977600098
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,12,128,1,fp8,fp8,0,1.3976367950439452
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,12,4,128,1,fp8,fp8,0,2.902203178405762
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,1,128,1,float16,float16,0,1.6622255325317383
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,1,128,1,float16,fp8,0,1.314417552947998
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,1,128,1,fp8,fp8,0,1.3522000312805176
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,2,128,1,float16,float16,0,1.4705951690673829
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,2,128,1,float16,fp8,0,1.54835844039917
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,2,128,1,fp8,fp8,0,1.3111328125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,4,128,1,float16,float16,0,1.5379535675048828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,4,128,1,float16,fp8,0,1.495785617828369
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,4,128,1,fp8,fp8,0,1.3585519790649414
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,12,128,1,float16,fp8,0,0.946230411529541
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,12,128,1,fp8,fp8,0,0.7673711776733398
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,1,128,1,float16,float16,0,0.7988848209381103
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,1,128,1,float16,fp8,0,0.7144591808319092
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,1,128,1,fp8,fp8,0,0.7471648216247558
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,2,128,1,float16,float16,0,0.7718639850616456
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,2,128,1,float16,fp8,0,0.7092368125915527
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,2,128,1,fp8,fp8,0,0.7965328216552734
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,4,128,1,float16,float16,0,0.7858928203582763
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,4,128,1,float16,fp8,0,0.7541327953338623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,4,128,1,fp8,fp8,0,0.7935711860656738
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,12,128,1,float16,fp8,0,0.44275679588317873
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,1,128,1,float16,float16,0,0.44002718925476075
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,12,128,1,fp8,fp8,0,0.41508002281188966
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,1,128,1,float16,fp8,0,0.4047344207763672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,1,128,1,fp8,fp8,0,0.40064477920532227
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,2,128,1,float16,fp8,0,0.4006175994873047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,2,128,1,fp8,fp8,0,0.40009279251098634
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,4,128,1,float16,float16,0,0.43326878547668457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,4,128,1,float16,fp8,0,0.40047359466552734
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,4,128,1,fp8,fp8,0,0.40091519355773925
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,2,128,1,float16,float16,0,0.4283008098602295
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,12,1,128,1,float16,float16,0,2.0936927795410156
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,12,1,128,1,float16,fp8,0,1.8118207931518555
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,12,1,128,1,fp8,fp8,0,2.052992057800293
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,12,2,128,1,float16,float16,0,2.0544111251831056
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,12,2,128,1,float16,fp8,0,1.8367424011230469
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,12,2,128,1,fp8,fp8,0,2.082900810241699
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,12,4,128,1,float16,float16,0,2.0796176910400392
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,12,4,128,1,float16,fp8,0,1.872609519958496
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,12,4,128,1,fp8,fp8,0,2.0371360778808594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,12,128,1,float16,fp8,0,1.026694393157959
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,12,128,1,fp8,fp8,0,0.9975135803222657
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,1,128,1,float16,float16,0,1.0525247573852539
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,1,128,1,float16,fp8,0,1.1040512084960938
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,1,128,1,fp8,fp8,0,1.0283984184265136
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,2,128,1,float16,float16,0,1.0580464363098145
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,2,128,1,float16,fp8,0,1.026460838317871
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,2,128,1,fp8,fp8,0,0.9966416358947754
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,4,128,1,float16,float16,0,1.0717023849487304
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,4,128,1,float16,fp8,0,0.9728943824768066
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,4,128,1,fp8,fp8,0,1.002222442626953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,12,128,1,float16,float16,0,0.5936384201049805
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,12,128,1,float16,float16,0,1.5665568351745605
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,12,128,1,float16,fp8,0,0.5413040161132813
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,12,128,1,float16,float16,0,0.8167535781860351
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,12,128,1,float16,float16,0,2.6946943283081053
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,1,128,1,float16,float16,0,0.5566800117492676
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,12,128,1,fp8,fp8,0,0.6188896179199219
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,1,128,1,float16,fp8,0,0.5234608173370361
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,1,128,1,fp8,fp8,0,0.651259183883667
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,2,128,1,float16,float16,0,0.5602511882781982
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,2,128,1,float16,fp8,0,0.553769588470459
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,2,128,1,fp8,fp8,0,0.5320623874664306
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,12,128,1,float16,float16,0,0.44733438491821287
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,4,128,1,float16,float16,0,0.672811222076416
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,12,128,1,float16,float16,0,1.1225296020507813
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,12,128,1,float16,float16,0,0.3366624116897583
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,4,128,1,float16,fp8,0,0.5232367992401123
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,12,128,1,float16,fp8,0,0.31438241004943845
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,4,128,1,fp8,fp8,0,0.534225606918335
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,12,128,1,float16,float16,0,0.7343935966491699
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,1,128,1,float16,float16,0,0.32606399059295654
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,12,128,1,fp8,fp8,0,0.39549760818481444
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,1,128,1,float16,fp8,0,0.3043071985244751
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,1,128,1,fp8,fp8,0,0.3040031909942627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,2,128,1,float16,fp8,0,0.3117536067962646
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,2,128,1,fp8,fp8,0,0.30134561061859133
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,4,128,1,float16,float16,0,0.32907679080963137
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,4,128,1,fp8,fp8,0,0.303601598739624
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,4,128,1,float16,fp8,0,0.34801759719848635
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,12,128,1,float16,float16,0,1.3557151794433593
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,12,1,128,1,float16,fp8,0,2.370689582824707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,12,1,128,1,fp8,fp8,0,2.3635663986206055
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,12,1,128,1,float16,float16,0,2.713844871520996
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,12,2,128,1,float16,fp8,0,2.3649776458740233
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,12,2,128,1,fp8,fp8,0,2.3861263275146483
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,12,2,128,1,float16,float16,0,2.7929712295532227
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,2,128,1,float16,float16,0,0.31245760917663573
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,12,4,128,1,float16,float16,0,2.80184326171875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,12,128,1,float16,fp8,0,1.2898752212524414
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,12,128,1,fp8,fp8,0,1.3175999641418457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,12,128,1,float16,float16,0,1.6374992370605468
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,1,128,1,float16,float16,0,1.5521056175231933
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,12,4,128,1,float16,fp8,0,2.3727359771728516
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,12,4,128,1,fp8,fp8,0,2.3968496322631836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,1,128,1,float16,fp8,0,1.2169008255004883
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,2,128,1,float16,fp8,0,1.2245583534240723
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,2,128,1,float16,float16,0,1.3453904151916505
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,12,128,1,float16,float16,0,0.7502304077148437
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,2,128,1,fp8,fp8,0,1.4404272079467773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,4,128,1,float16,float16,0,1.4293375968933106
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,4,128,1,float16,fp8,0,1.228163242340088
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,12,128,1,float16,fp8,0,0.6813375949859619
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,4,128,1,fp8,fp8,0,1.2530863761901856
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,12,128,1,fp8,fp8,0,0.6811232089996337
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,1,128,1,float16,float16,0,0.6920623779296875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,2,128,1,float16,fp8,0,0.6442512035369873
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,2,128,1,float16,float16,0,0.7020592212677002
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,4,128,1,float16,float16,0,0.7193999767303467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,4,128,1,float16,fp8,0,0.676470422744751
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,12,128,1,float16,float16,0,0.4077311992645264
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,4,128,1,fp8,fp8,0,0.644539213180542
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,12,128,1,float16,fp8,0,0.39422080516815183
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,12,128,1,fp8,fp8,0,0.3891119956970215
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,1,128,1,float16,float16,0,0.371996808052063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,1,128,1,float16,fp8,0,0.3735392093658447
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,1,128,1,fp8,fp8,0,0.3694175958633423
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,2,128,1,float16,float16,0,0.3748399972915649
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,2,128,1,float16,fp8,0,0.3704591989517212
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,2,128,1,fp8,fp8,0,0.36231839656829834
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,1,128,1,fp8,fp8,0,1.4215408325195313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,4,128,1,float16,fp8,0,0.360263991355896
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,12,128,1,float16,float16,0,0.23459041118621826
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,4,128,1,fp8,fp8,0,0.36401278972625734
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,12,128,1,float16,fp8,0,0.22133278846740723
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,12,128,1,fp8,fp8,0,0.2210832118988037
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,1,128,1,float16,float16,0,0.22041280269622804
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,1,128,1,float16,fp8,0,0.21189439296722412
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,1,128,1,fp8,fp8,0,0.21208479404449462
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,2,128,1,float16,fp8,0,0.21139039993286132
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,1,128,1,float16,fp8,0,0.6835984230041504
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,2,128,1,fp8,fp8,0,0.2119920015335083
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,4,128,1,float16,float16,0,0.22013599872589112
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,4,128,1,float16,fp8,0,0.21141600608825684
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,2,128,1,fp8,fp8,0,0.6503183841705322
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,1,128,1,fp8,fp8,0,0.7636672019958496
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,4,128,1,fp8,fp8,0,0.2119839906692505
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,12,1,128,1,float16,fp8,0,1.429644775390625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,12,1,128,1,float16,float16,0,1.5381967544555664
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,12,1,128,1,fp8,fp8,0,1.4249888420104981
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,12,2,128,1,float16,fp8,0,1.4242608070373535
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,12,2,128,1,float16,float16,0,1.553313636779785
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,4,128,1,float16,float16,0,0.3809439897537231
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,12,128,1,float16,float16,0,0.9469632148742676
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,12,2,128,1,fp8,fp8,0,1.4278575897216796
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,12,128,1,float16,fp8,0,0.7950384140014648
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,2,128,1,float16,float16,0,0.22081921100616456
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,12,4,128,1,float16,float16,0,1.6520816802978515
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,12,4,128,1,fp8,fp8,0,1.4250255584716798
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,12,128,1,fp8,fp8,0,0.7959375858306885
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,1,128,1,float16,float16,0,0.8402048110961914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,1,128,1,float16,fp8,0,0.7415120124816894
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,1,128,1,fp8,fp8,0,0.7413680076599121
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,2,128,1,fp8,fp8,0,0.7410655975341797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,2,128,1,float16,float16,0,0.7975647926330567
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,2,128,1,float16,fp8,0,0.9146639823913574
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,4,128,1,float16,float16,0,0.8112144470214844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,12,128,1,float16,float16,0,0.4571040153503418
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,12,128,1,float16,fp8,0,0.4268239974975586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,4,128,1,fp8,fp8,0,0.7425424098968506
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,12,128,1,fp8,fp8,0,0.4272831916809082
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,4,128,1,float16,fp8,0,0.8930751800537109
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,1,128,1,float16,float16,0,0.45713119506835936
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,1,128,1,float16,fp8,0,0.4241936206817627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,1,128,1,fp8,fp8,0,0.399783992767334
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,2,128,1,float16,float16,0,0.4140624046325684
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,2,128,1,fp8,fp8,0,0.39942240715026855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,4,128,1,float16,float16,0,0.4596672058105469
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,12,128,1,float16,float16,0,0.2521071910858154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,4,128,1,float16,fp8,0,0.45297918319702146
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,12,128,1,float16,fp8,0,0.24344000816345215
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,12,128,1,fp8,fp8,0,0.2507055997848511
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,1,128,1,float16,float16,0,0.2443392038345337
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,1,128,1,float16,fp8,0,0.2275696039199829
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,1,128,1,fp8,fp8,0,0.23412001132965088
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,2,128,1,float16,float16,0,0.23304479122161864
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,2,128,1,float16,fp8,0,0.2362816095352173
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,2,128,1,fp8,fp8,0,0.24101440906524657
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,4,128,1,float16,float16,0,0.2369055986404419
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,4,128,1,float16,fp8,0,0.23143839836120605
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,12,128,1,float16,float16,0,0.1630288004875183
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,4,128,1,fp8,fp8,0,0.22735838890075682
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,12,128,1,fp8,fp8,0,0.15047520399093628
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,12,128,1,float16,fp8,0,0.15905280113220216
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,1,128,1,float16,float16,0,0.14823999404907226
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,1,128,1,float16,fp8,0,0.1445312023162842
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,1,128,1,fp8,fp8,0,0.14352960586547853
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,2,128,1,float16,float16,0,0.14739359617233277
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,2,128,1,float16,fp8,0,0.14823199510574342
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,2,128,1,fp8,fp8,0,0.14387359619140624
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,4,128,1,float16,float16,0,0.1495919942855835
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,4,128,1,float16,fp8,0,0.1432800054550171
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,4,128,1,fp8,fp8,0,0.14350559711456298
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,12,4,128,1,float16,fp8,0,1.4265151977539063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,12,1,128,1,float16,fp8,0,1.4016223907470704
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,12,1,128,1,float16,float16,0,1.4636639595031737
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,12,1,128,1,fp8,fp8,0,1.4006112098693848
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,12,2,128,1,float16,fp8,0,1.3970255851745605
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,12,2,128,1,fp8,fp8,0,1.397062397003174
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,2,128,1,float16,fp8,0,0.39976799488067627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,4,128,1,fp8,fp8,0,0.4255199909210205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,12,128,1,float16,float16,0,0.8557408332824707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,12,128,1,float16,fp8,0,0.7905519962310791
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,12,4,128,1,float16,float16,0,1.5100239753723144
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,12,4,128,1,float16,fp8,0,1.5202879905700684
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,12,4,128,1,fp8,fp8,0,1.3957695960998535
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,12,128,1,fp8,fp8,0,0.7922080039978028
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,1,128,1,float16,float16,0,0.7421728134155273
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,1,128,1,float16,fp8,0,0.7186031818389893
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,1,128,1,fp8,fp8,0,0.7182240009307861
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,2,128,1,float16,fp8,0,0.7463856220245362
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,2,128,1,fp8,fp8,0,0.7179728031158448
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,2,128,1,float16,float16,0,0.8383456230163574
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,4,128,1,float16,float16,0,0.77150559425354
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,4,128,1,float16,fp8,0,0.7193103790283203
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,4,128,1,fp8,fp8,0,0.7186831951141357
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,12,128,1,float16,float16,0,0.44333438873291015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,1,128,1,float16,float16,0,0.3857055902481079
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,12,128,1,fp8,fp8,0,0.4897471904754639
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,1,128,1,float16,fp8,0,0.3898207902908325
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,1,128,1,fp8,fp8,0,0.380947208404541
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,2,128,1,float16,float16,0,0.38795039653778074
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,2,128,1,float16,fp8,0,0.3863663911819458
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,2,128,1,fp8,fp8,0,0.4470111846923828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,4,128,1,float16,float16,0,0.4013711929321289
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,4,128,1,float16,fp8,0,0.3812704086303711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,12,128,1,float16,float16,0,0.24058079719543457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,4,128,1,fp8,fp8,0,0.38530879020690917
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,12,128,1,float16,fp8,0,0.23995840549468994
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,12,128,1,fp8,fp8,0,0.23309760093688964
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,1,128,1,float16,float16,0,0.21140799522399903
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,1,128,1,float16,fp8,0,0.21470398902893068
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,1,128,1,fp8,fp8,0,0.21249759197235107
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,2,128,1,float16,float16,0,0.2172976016998291
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,2,128,1,float16,fp8,0,0.21272799968719483
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,2,128,1,fp8,fp8,0,0.21140639781951903
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,4,128,1,float16,float16,0,0.22025279998779296
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,4,128,1,float16,fp8,0,0.212608003616333
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,12,128,1,float16,float16,0,0.13958719968795777
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,4,128,1,fp8,fp8,0,0.22060160636901854
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,12,128,1,float16,fp8,0,0.13395680189132692
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,12,128,1,fp8,fp8,0,0.13691359758377075
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,1,128,1,float16,float16,0,0.12592799663543702
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,1,128,1,fp8,fp8,0,0.12974400520324708
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,1,128,1,float16,fp8,0,0.12679680585861205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,2,128,1,float16,float16,0,0.12657760381698607
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,2,128,1,float16,fp8,0,0.12741440534591675
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,2,128,1,fp8,fp8,0,0.12496479749679565
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,4,128,1,float16,fp8,0,0.12625919580459594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,4,128,1,fp8,fp8,0,0.12527040243148804
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,12,128,1,float16,float16,0,0.08939520120620728
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,12,128,1,float16,fp8,0,0.08591840267181397
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,12,128,1,fp8,fp8,0,0.08677600026130676
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,1,128,1,float16,fp8,0,0.08381919860839844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,2,128,1,float16,float16,0,0.08413599729537964
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,2,128,1,float16,fp8,0,0.08271039724349975
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,2,128,1,fp8,fp8,0,0.08177599906921387
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,4,128,1,float16,float16,0,0.08604320287704467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,4,128,1,float16,fp8,0,0.08207359910011292
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,4,128,1,fp8,fp8,0,0.08257120251655578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,12,128,1,float16,fp8,0,0.41573920249938967
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,12,1,128,1,float16,float16,0,0.8800704002380371
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,12,1,128,1,float16,fp8,0,0.871406364440918
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,12,2,128,1,float16,float16,0,1.4539456367492676
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,12,1,128,1,fp8,fp8,0,0.8705887794494629
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,12,2,128,1,float16,float16,0,0.8867456436157226
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,12,2,128,1,float16,fp8,0,0.8720191955566406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,12,2,128,1,fp8,fp8,0,0.8731663703918457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,12,4,128,1,float16,float16,0,0.9085424423217774
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,12,128,1,float16,float16,0,0.5360799789428711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,12,4,128,1,float16,fp8,0,0.8708208084106446
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,4,128,1,float16,float16,0,0.13335039615631103
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,12,4,128,1,fp8,fp8,0,1.033142375946045
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,1,128,1,float16,float16,0,0.08361120223999023
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,1,128,1,float16,float16,0,0.4448224067687988
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,12,128,1,float16,fp8,0,0.5080160140991211
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,12,128,1,fp8,fp8,0,0.5069488048553467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,1,128,1,float16,fp8,0,0.45243039131164553
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,1,128,1,fp8,fp8,0,0.45092158317565917
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,2,128,1,float16,float16,0,0.4642496109008789
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,2,128,1,float16,fp8,0,0.4524687767028809
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,2,128,1,fp8,fp8,0,0.4523183822631836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,4,128,1,float16,float16,0,0.46519999504089354
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,4,128,1,float16,fp8,0,0.4534111976623535
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,12,128,1,float16,float16,0,0.2786895990371704
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,12,128,1,float16,fp8,0,0.282590389251709
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,4,128,1,fp8,fp8,0,0.4532623767852783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,1,128,1,float16,float16,0,0.2539072036743164
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,12,128,1,fp8,fp8,0,0.27070400714874265
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,1,128,1,float16,fp8,0,0.24495038986206055
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,2,128,1,float16,float16,0,0.23864960670471191
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,1,128,1,fp8,fp8,0,0.24397759437561034
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,2,128,1,fp8,fp8,0,0.2438591957092285
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,12,128,1,float16,float16,0,0.15514719486236572
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,12,128,1,float16,fp8,0,0.15331679582595825
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,4,128,1,float16,float16,0,0.2501503944396973
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,4,128,1,float16,fp8,0,0.24405438899993898
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,4,128,1,fp8,fp8,0,0.2444159984588623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,12,128,1,fp8,fp8,0,0.15309120416641236
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,1,128,1,float16,float16,0,0.13596639633178711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,1,128,1,float16,fp8,0,0.1383679986000061
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,2,128,1,float16,float16,0,0.13746240139007568
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,1,128,1,fp8,fp8,0,0.14177119731903076
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,2,128,1,fp8,fp8,0,0.13795039653778077
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,4,128,1,float16,float16,0,0.14147520065307617
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,4,128,1,float16,fp8,0,0.138264000415802
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,12,128,1,float16,float16,0,0.09776480197906494
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,12,128,1,float16,fp8,0,0.09298400282859802
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,4,128,1,fp8,fp8,0,0.13876160383224487
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,12,128,1,fp8,fp8,0,0.09314720034599304
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,1,128,1,float16,float16,0,0.08752959966659546
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,1,128,1,float16,fp8,0,0.08906239867210389
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,2,128,1,float16,float16,0,0.0871616005897522
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,2,128,1,float16,fp8,0,0.08648639917373657
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,2,128,1,fp8,fp8,0,0.0883679986000061
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,4,128,1,float16,float16,0,0.08878080248832702
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,4,128,1,float16,fp8,0,0.08647199869155883
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,4,128,1,fp8,fp8,0,0.08866239786148071
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,12,128,1,float16,fp8,0,0.0552672028541565
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,12,128,1,fp8,fp8,0,0.05620959997177124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,1,128,1,float16,float16,0,0.05249119997024536
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,1,128,1,float16,fp8,0,0.053508800268173215
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,1,128,1,fp8,fp8,0,0.05141760110855102
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,2,128,1,float16,float16,0,0.053067201375961305
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,2,128,1,float16,fp8,0,0.05144320130348205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,2,128,1,fp8,fp8,0,0.05184000134468079
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,4,128,1,float16,float16,0,0.05330079793930054
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,4,128,1,float16,fp8,0,0.05226560235023499
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,4,128,1,fp8,fp8,0,0.05146880149841308
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,1,128,1,fp8,fp8,0,0.08193280100822449
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,12,1,128,1,float16,float16,0,0.8608223915100097
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,12,1,128,1,float16,fp8,0,0.905303955078125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,12,1,128,1,fp8,fp8,0,0.9064559936523438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,2,128,1,float16,fp8,0,0.2535968065261841
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,12,2,128,1,float16,float16,0,0.8751903533935547
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,12,2,128,1,float16,fp8,0,0.904640007019043
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,2,128,1,float16,fp8,0,0.13797760009765625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,12,2,128,1,fp8,fp8,0,0.9062512397766114
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,1,128,1,fp8,fp8,0,0.08636159896850586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,12,4,128,1,float16,float16,0,0.9220848083496094
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,12,128,1,float16,float16,0,0.5531919956207275
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,12,128,1,float16,float16,0,0.05522720217704773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,12,4,128,1,float16,fp8,0,0.9485600471496582
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,12,4,128,1,fp8,fp8,0,0.9055024147033691
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,12,128,1,float16,fp8,0,0.5377583980560303
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,12,128,1,fp8,fp8,0,0.5354671955108643
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,1,128,1,float16,float16,0,0.4463200092315674
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,1,128,1,float16,fp8,0,0.4656928062438965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,1,128,1,fp8,fp8,0,0.4641615867614746
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,2,128,1,float16,float16,0,0.44452319145202634
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,2,128,1,float16,fp8,0,0.46627202033996584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,2,128,1,fp8,fp8,0,0.46653118133544924
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,4,128,1,float16,float16,0,0.46995840072631834
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,4,128,1,float16,fp8,0,0.481547212600708
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,12,128,1,float16,float16,0,0.28565919399261475
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,12,128,1,float16,fp8,0,0.28903520107269287
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,4,128,1,fp8,fp8,0,0.46599040031433103
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,1,128,1,float16,float16,0,0.23186719417572021
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,12,128,1,fp8,fp8,0,0.28236799240112304
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,1,128,1,float16,fp8,0,0.2461872100830078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,1,128,1,fp8,fp8,0,0.24949278831481933
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,2,128,1,float16,float16,0,0.2321631908416748
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,2,128,1,float16,fp8,0,0.2518831968307495
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,2,128,1,fp8,fp8,0,0.24643199443817138
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,4,128,1,float16,float16,0,0.2440256118774414
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,4,128,1,float16,fp8,0,0.24726719856262208
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,12,128,1,float16,float16,0,0.1568608045578003
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,12,128,1,float16,fp8,0,0.15531680583953858
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,4,128,1,fp8,fp8,0,0.24641120433807373
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,12,128,1,fp8,fp8,0,0.1587391972541809
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,1,128,1,float16,float16,0,0.1321679949760437
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,1,128,1,float16,fp8,0,0.13539199829101561
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,1,128,1,fp8,fp8,0,0.13698400259017945
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,2,128,1,float16,fp8,0,0.13764959573745728
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,4,128,1,float16,float16,0,0.13607679605484008
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,4,128,1,float16,fp8,0,0.1409824013710022
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,4,128,1,fp8,fp8,0,0.13681440353393554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,12,128,1,float16,fp8,0,0.09036160111427308
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,12,128,1,fp8,fp8,0,0.08972799777984619
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,1,128,1,float16,float16,0,0.0791375994682312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,1,128,1,float16,fp8,0,0.08265600204467774
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,1,128,1,fp8,fp8,0,0.08063679933547974
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,2,128,1,float16,fp8,0,0.08141120076179505
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,2,128,1,fp8,fp8,0,0.08112800121307373
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,4,128,1,float16,float16,0,0.08309599757194519
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,4,128,1,float16,fp8,0,0.08050400018692017
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,12,128,1,float16,float16,0,0.05709599852561951
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,12,128,1,float16,fp8,0,0.058220797777175905
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,12,128,1,fp8,fp8,0,0.05750560164451599
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,1,128,1,float16,float16,0,0.053380799293518064
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,1,128,1,float16,fp8,0,0.05375360250473023
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,1,128,1,fp8,fp8,0,0.052983999252319336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,2,128,1,float16,float16,0,0.053406399488449094
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,2,128,1,fp8,fp8,0,0.053065598011016846
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,4,128,1,float16,float16,0,0.05397599935531616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,4,128,1,float16,fp8,0,0.053625601530075076
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,4,128,1,fp8,fp8,0,0.05338720083236694
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,12,128,1,float16,float16,0,0.04044159948825836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,12,128,1,fp8,fp8,0,0.04011200070381164
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,1,128,1,float16,float16,0,0.03865439891815185
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,1,128,1,float16,fp8,0,0.03886080086231232
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,1,128,1,fp8,fp8,0,0.03874880075454712
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,2,128,1,float16,float16,0,0.03914079964160919
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,2,128,1,float16,fp8,0,0.03853119909763336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,2,128,1,fp8,fp8,0,0.03883520066738129
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,4,128,1,float16,float16,0,0.03920640051364899
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,4,128,1,float16,fp8,0,0.038664001226425174
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,4,128,1,fp8,fp8,0,0.03874239921569824
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,12,1,128,1,float16,float16,0,0.5416863918304443
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,2,128,1,float16,float16,0,0.13104480504989624
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,2,128,1,fp8,fp8,0,0.13574719429016113
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,12,128,1,float16,float16,0,0.09096320271492005
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,2,128,1,float16,float16,0,0.07842720150947571
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,4,128,1,fp8,fp8,0,0.08164960145950317
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,12,1,128,1,float16,fp8,0,0.591105604171753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,12,1,128,1,fp8,fp8,0,0.5890207767486573
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,12,2,128,1,float16,float16,0,0.5440591812133789
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,2,128,1,float16,fp8,0,0.05373119711875916
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,12,2,128,1,float16,fp8,0,0.5912415981292725
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,12,2,128,1,fp8,fp8,0,0.5914239883422852
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,12,128,1,float16,fp8,0,0.040443199872970584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,12,4,128,1,float16,float16,0,0.5821360111236572
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,12,128,1,float16,float16,0,0.36225759983062744
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,12,4,128,1,float16,fp8,0,0.5899663925170898
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,12,4,128,1,fp8,fp8,0,0.5929376125335694
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,12,128,1,float16,fp8,0,0.3604912042617798
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,1,128,1,float16,float16,0,0.27990078926086426
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,12,128,1,fp8,fp8,0,0.36062400341033934
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,2,128,1,float16,float16,0,0.2805919885635376
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,2,128,1,float16,fp8,0,0.30714879035949705
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,2,128,1,fp8,fp8,0,0.3069904088973999
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,4,128,1,float16,float16,0,0.29765279293060304
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,4,128,1,float16,fp8,0,0.3076623916625977
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,12,128,1,float16,float16,0,0.19104959964752197
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,12,128,1,float16,fp8,0,0.1923632025718689
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,12,128,1,fp8,fp8,0,0.19168959856033324
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,1,128,1,float16,fp8,0,0.16556639671325685
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,1,128,1,fp8,fp8,0,0.16527520418167113
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,2,128,1,float16,float16,0,0.152673602104187
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,2,128,1,float16,fp8,0,0.16545120477676392
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,2,128,1,fp8,fp8,0,0.1651039958000183
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,4,128,1,float16,float16,0,0.1607200026512146
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,4,128,1,float16,fp8,0,0.16589440107345582
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,4,128,1,fp8,fp8,0,0.16552640199661256
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,12,128,1,float16,float16,0,0.10603840351104736
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,12,128,1,float16,fp8,0,0.10833760499954223
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,12,128,1,fp8,fp8,0,0.10812159776687622
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,1,128,1,float16,fp8,0,0.09262080192565918
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,1,128,1,fp8,fp8,0,0.09228479862213135
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,2,128,1,float16,fp8,0,0.09232640266418457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,2,128,1,fp8,fp8,0,0.09283360242843627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,4,128,1,float16,float16,0,0.09264479875564575
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,4,128,1,float16,fp8,0,0.09258400201797486
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,4,128,1,fp8,fp8,0,0.09292320013046265
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,12,128,1,float16,float16,0,0.06460639834403992
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,12,128,1,float16,fp8,0,0.0638159990310669
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,12,128,1,fp8,fp8,0,0.06341919898986817
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,1,128,1,float16,float16,0,0.05525280237197876
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,1,128,1,float16,fp8,0,0.056561601161956784
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,1,128,1,fp8,fp8,0,0.05716320276260376
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,2,128,1,float16,float16,0,0.05543199777603149
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,2,128,1,float16,fp8,0,0.057339197397232054
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,2,128,1,fp8,fp8,0,0.0573199987411499
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,4,128,1,float16,float16,0,0.05648319721221924
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,4,128,1,float16,fp8,0,0.05737599730491638
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,4,128,1,fp8,fp8,0,0.05670080184936523
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,12,128,1,float16,float16,0,0.0371071994304657
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,12,128,1,float16,fp8,0,0.03714239895343781
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,12,128,1,fp8,fp8,0,0.037136000394821164
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,1,128,1,float16,float16,0,0.033000001311302186
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,1,128,1,float16,fp8,0,0.30731201171875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,1,128,1,fp8,fp8,0,0.3064016103744507
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,2,128,1,float16,float16,0,0.03365280032157898
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,2,128,1,float16,fp8,0,0.03419359922409058
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,2,128,1,fp8,fp8,0,0.03448159992694855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,4,128,1,float16,float16,0,0.035020801424980166
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,4,128,1,float16,fp8,0,0.03488639891147614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,4,128,1,fp8,fp8,0,0.035025599598884585
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,12,128,1,float16,float16,0,0.033057600259780884
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,12,128,1,float16,fp8,0,0.03299359977245331
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,1,128,1,float16,float16,0,0.15279200077056884
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,12,128,1,fp8,fp8,0,0.03300319910049439
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,1,128,1,float16,float16,0,0.030929601192474364
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,1,128,1,float16,fp8,0,0.030884799361228944
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,2,128,1,float16,float16,0,0.0310479998588562
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,2,128,1,float16,fp8,0,0.03091680109500885
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,2,128,1,fp8,fp8,0,0.030928000807762146
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,4,128,1,float16,float16,0,0.03126559853553772
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,4,128,1,float16,fp8,0,0.030959999561309813
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,4,128,1,fp8,fp8,0,0.03096640110015869
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,1,128,1,float16,float16,0,0.0874783992767334
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,12,1,128,1,float16,float16,0,0.5694640159606934
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,12,1,128,1,float16,fp8,0,0.6580912113189697
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,12,1,128,1,fp8,fp8,0,0.65589919090271
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,12,2,128,1,float16,float16,0,0.5755248069763184
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,12,2,128,1,float16,fp8,0,0.6577151775360107
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,12,2,128,1,fp8,fp8,0,0.656276798248291
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,12,4,128,1,float16,float16,0,0.6191135883331299
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,12,4,128,1,float16,fp8,0,0.6580832004547119
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,1,128,1,fp8,fp8,0,0.033667200803756715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,1,128,1,float16,fp8,0,0.03419840037822723
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,12,128,1,float16,float16,0,0.40677919387817385
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,12,4,128,1,fp8,fp8,0,0.6552015781402588
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,4,128,1,fp8,fp8,0,0.307041597366333
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,12,128,1,float16,fp8,0,0.4086143970489502
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,1,128,1,float16,float16,0,0.29561920166015626
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,12,128,1,fp8,fp8,0,0.4087200164794922
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,1,128,1,float16,fp8,0,0.33748319149017336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,1,128,1,fp8,fp8,0,0.030958399176597595
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,2,128,1,float16,float16,0,0.2968672037124634
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,2,128,1,float16,fp8,0,0.33843040466308594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,4,128,1,float16,float16,0,0.31680641174316404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,2,128,1,fp8,fp8,0,0.3381360054016113
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,4,128,1,float16,fp8,0,0.3377808094024658
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,12,128,1,float16,float16,0,0.21058239936828613
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,12,128,1,float16,fp8,0,0.21470561027526855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,4,128,1,fp8,fp8,0,0.33892800807952883
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,12,128,1,fp8,fp8,0,0.21392319202423096
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,1,128,1,float16,float16,0,0.15831199884414673
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,1,128,1,float16,fp8,0,0.1784767985343933
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,1,128,1,fp8,fp8,0,0.17870080471038818
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,2,128,1,float16,fp8,0,0.17884000539779663
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,2,128,1,float16,float16,0,0.08934879899024964
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,4,128,1,float16,float16,0,0.1690608024597168
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,4,128,1,float16,fp8,0,0.1794160008430481
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,4,128,1,fp8,fp8,0,0.17921279668807982
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,12,128,1,float16,float16,0,0.11453759670257568
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,12,128,1,float16,fp8,0,0.1171839952468872
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,1,128,1,float16,float16,0,0.09108319878578186
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,1,128,1,float16,fp8,0,0.09817759990692139
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,2,128,1,float16,float16,0,0.0913968026638031
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,2,128,1,float16,fp8,0,0.09810879826545715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,2,128,1,fp8,fp8,0,0.09866560101509095
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,4,128,1,float16,float16,0,0.09599199891090393
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,4,128,1,float16,fp8,0,0.09872159957885743
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,4,128,1,fp8,fp8,0,0.09921600222587586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,12,128,1,float16,float16,0,0.06661120057106018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,12,128,1,float16,fp8,0,0.0680191993713379
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,12,128,1,fp8,fp8,0,0.06643199920654297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,1,128,1,float16,float16,0,0.054385602474212646
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,1,128,1,float16,fp8,0,0.05787039995193481
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,1,128,1,fp8,fp8,0,0.05820159912109375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,2,128,1,float16,float16,0,0.05378400087356568
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,2,128,1,float16,fp8,0,0.058278399705886844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,2,128,1,fp8,fp8,0,0.05790079832077026
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,4,128,1,float16,float16,0,0.05702239871025085
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,4,128,1,float16,fp8,0,0.05788959860801697
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,4,128,1,fp8,fp8,0,0.05819360017776489
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,12,128,1,float16,fp8,0,0.04297600090503693
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,12,128,1,float16,float16,0,0.04081279933452606
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,12,128,1,fp8,fp8,0,0.043084800243377686
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,1,128,1,float16,float16,0,0.03714239895343781
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,1,128,1,float16,fp8,0,0.03899360001087189
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,1,128,1,fp8,fp8,0,0.037627199292182924
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,2,128,1,float16,float16,0,0.03719359934329987
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,2,128,1,float16,fp8,0,0.038022398948669434
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,2,128,1,fp8,fp8,0,0.038369598984718326
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,4,128,1,float16,float16,0,0.038631999492645265
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,4,128,1,float16,fp8,0,0.03850240111351013
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,4,128,1,fp8,fp8,0,0.038649600744247434
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,12,128,1,float16,float16,0,0.02686559855937958
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,12,128,1,float16,fp8,0,0.02890239953994751
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,12,128,1,fp8,fp8,0,0.028867200016975403
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,1,128,1,float16,float16,0,0.026800000667572023
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,1,128,1,float16,fp8,0,0.02671839892864227
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,1,128,1,fp8,fp8,0,0.026807999610900878
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,2,128,1,float16,float16,0,0.025284799933433532
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,2,128,1,float16,float16,0,0.16016800403594972
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,2,128,1,float16,fp8,0,0.026870399713516235
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,2,128,1,fp8,fp8,0,0.02680639922618866
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,2,128,1,fp8,fp8,0,0.178656005859375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,4,128,1,float16,fp8,0,0.026867198944091796
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,4,128,1,fp8,fp8,0,0.026819199323654175
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,12,128,1,float16,float16,0,0.025183999538421632
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,12,128,1,fp8,fp8,0,0.026830399036407472
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,12,128,1,float16,fp8,0,0.025278401374816895
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,1,128,1,float16,float16,0,0.02476799935102463
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,1,128,1,fp8,fp8,0,0.024806399643421174
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,2,128,1,float16,float16,0,0.02481119930744171
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,12,128,1,fp8,fp8,0,0.11735199689865113
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,2,128,1,float16,fp8,0,0.024798400700092316
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,2,128,1,fp8,fp8,0,0.024779200553894043
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,4,128,1,float16,float16,0,0.024784000217914583
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,1,128,1,fp8,fp8,0,0.0972544014453888
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,4,128,1,float16,fp8,0,0.024774399399757386
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,12,1,128,1,float16,float16,0,0.4351151943206787
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,12,1,128,1,float16,fp8,0,0.53089280128479
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,12,1,128,1,fp8,fp8,0,0.5323040008544921
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,12,2,128,1,float16,fp8,0,0.5309840202331543
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,12,4,128,1,float16,float16,0,0.4794271945953369
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,12,2,128,1,fp8,fp8,0,0.5305200099945069
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,12,12,128,1,float16,float16,0,0.33208479881286623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,12,4,128,1,float16,fp8,0,0.5293871879577636
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,12,4,128,1,fp8,fp8,0,0.5322576045989991
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,1,128,1,fp8,fp8,0,0.33704800605773927
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,12,12,128,1,float16,fp8,0,0.34179840087890623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,12,12,128,1,fp8,fp8,0,0.3410720109939575
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,12,1,128,1,float16,fp8,0,0.2725696086883545
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,12,2,128,1,float16,float16,0,0.22860639095306395
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,12,1,128,1,fp8,fp8,0,0.2733680009841919
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,12,2,128,1,float16,fp8,0,0.2730815887451172
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,12,4,128,1,float16,float16,0,0.2474560022354126
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,4,128,1,float16,float16,0,0.026859200000762938
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,1,128,1,float16,fp8,0,0.024743999540805816
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,12,4,128,1,float16,fp8,0,0.2727024078369141
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,12,12,128,1,float16,float16,0,0.17343039512634278
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,4,128,1,fp8,fp8,0,0.024795199930667877
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,12,12,128,1,fp8,fp8,0,0.17885440587997437
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,12,1,128,1,float16,float16,0,0.12308800220489502
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,12,1,128,1,float16,fp8,0,0.14533599615097045
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,12,1,128,1,fp8,fp8,0,0.14455840587615967
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,12,2,128,1,float16,float16,0,0.12395520210266113
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,12,2,128,1,float16,float16,0,0.4390719890594482
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,12,2,128,1,fp8,fp8,0,0.1458672046661377
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,12,4,128,1,float16,float16,0,0.13282400369644165
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,12,4,128,1,float16,fp8,0,0.14567840099334717
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,12,4,128,1,fp8,fp8,0,0.14539040327072145
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,12,12,128,1,float16,float16,0,0.09431679844856262
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,12,12,128,1,float16,fp8,0,0.09855840206146241
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,12,12,128,1,fp8,fp8,0,0.09831039905548096
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,12,1,128,1,float16,float16,0,0.0703599989414215
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,12,1,128,1,float16,fp8,0,0.07915520071983337
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,12,1,128,1,fp8,fp8,0,0.0788976013660431
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,12,2,128,1,float16,float16,0,0.07120320200920105
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,12,2,128,1,float16,fp8,0,0.07969599962234497
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,12,2,128,1,fp8,fp8,0,0.07966560125350952
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,12,4,128,1,float16,fp8,0,0.0802735984325409
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,12,1,128,1,float16,float16,0,0.22781920433044434
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,12,4,128,1,fp8,fp8,0,0.08046560287475586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,12,12,128,1,float16,float16,0,0.053630399703979495
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,12,12,128,1,float16,fp8,0,0.05520319938659668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,12,12,128,1,fp8,fp8,0,0.05543199777603149
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,12,1,128,1,float16,float16,0,0.04118399918079376
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,12,1,128,1,float16,fp8,0,0.0458079993724823
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,12,1,128,1,fp8,fp8,0,0.04587680101394653
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,12,2,128,1,float16,float16,0,0.041203200817108154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,12,2,128,1,float16,fp8,0,0.0457152009010315
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,12,2,128,1,fp8,fp8,0,0.04623839855194092
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,12,2,128,1,fp8,fp8,0,0.2731584072113037
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,12,4,128,1,float16,float16,0,0.043315199017524716
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,12,4,128,1,float16,fp8,0,0.04642719924449921
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,12,4,128,1,fp8,fp8,0,0.046433600783348086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,12,12,128,1,float16,float16,0,0.03299199938774109
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,12,12,128,1,float16,fp8,0,0.03513599932193756
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,12,12,128,1,fp8,fp8,0,0.0351936012506485
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,12,1,128,1,float16,float16,0,0.02886880040168762
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,12,1,128,1,float16,fp8,0,0.030976000428199767
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,12,4,128,1,fp8,fp8,0,0.27312960624694826
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,12,1,128,1,fp8,fp8,0,0.031068798899650574
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,12,2,128,1,float16,float16,0,0.02889440059661865
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,12,2,128,1,float16,fp8,0,0.030972799658775328
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,12,4,128,1,float16,fp8,0,0.030943998694419862
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,12,4,128,1,fp8,fp8,0,0.030961599946022034
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,12,12,128,1,float16,float16,0,0.021036800742149354
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,12,12,128,1,float16,fp8,0,0.022702400386333466
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,12,12,128,1,fp8,fp8,0,0.022745600342750548
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,12,1,128,1,float16,float16,0,0.020187200605869295
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,12,1,128,1,float16,fp8,0,0.02064319998025894
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,12,2,128,1,float16,float16,0,0.020473599433898926
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,12,2,128,1,float16,fp8,0,0.020684799551963805
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,12,2,128,1,fp8,fp8,0,0.0207056000828743
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,12,2,128,1,float16,fp8,0,0.14441440105438233
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,12,4,128,1,float16,fp8,0,0.020764799416065217
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,12,4,128,1,fp8,fp8,0,0.020745599269866945
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,12,12,128,1,float16,float16,0,0.018751999735832213
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,12,12,128,1,float16,fp8,0,0.02067680060863495
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,12,12,128,1,fp8,fp8,0,0.02075680047273636
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,12,1,128,1,float16,float16,0,0.0186271995306015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,12,1,128,1,float16,fp8,0,0.018620799481868743
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,12,2,128,1,float16,float16,0,0.018676799535751343
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,12,2,128,1,float16,fp8,0,0.018751999735832213
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,12,2,128,1,fp8,fp8,0,0.01866399943828583
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,12,4,128,1,float16,float16,0,0.018649600446224213
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,12,4,128,1,float16,fp8,0,0.018680000305175783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,12,4,128,1,fp8,fp8,0,0.018628799915313722
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,12,12,128,1,float16,float16,0,0.018651199340820313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,12,4,128,1,float16,float16,0,0.07569599747657776
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,12,12,128,1,float16,fp8,0,0.018643200397491455
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,12,12,128,1,fp8,fp8,0,0.01868479996919632
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,12,1,128,1,float16,fp8,0,0.018615999817848207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,12,1,128,1,fp8,fp8,0,0.01860480010509491
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,12,2,128,1,float16,float16,0,0.018568000197410582
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,12,2,128,1,float16,fp8,0,0.018628799915313722
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,12,2,128,1,fp8,fp8,0,0.018648000061511995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,12,4,128,1,float16,float16,0,0.01865919977426529
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,12,4,128,1,float16,fp8,0,0.018671999871730804
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,12,4,128,1,fp8,fp8,0,0.018681600689888
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,12,1,128,1,float16,float16,0,0.1888432025909424
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,12,2,128,1,fp8,fp8,0,0.03094879984855652
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,12,1,128,1,float16,fp8,0,0.23830881118774414
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,12,1,128,1,fp8,fp8,0,0.23649280071258544
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,12,2,128,1,float16,float16,0,0.1911743998527527
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,12,4,128,1,float16,float16,0,0.02064799964427948
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,12,2,128,1,float16,fp8,0,0.2371056079864502
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,12,2,128,1,fp8,fp8,0,0.23840320110321045
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,12,4,128,1,float16,float16,0,0.20961439609527588
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,12,1,128,1,fp8,fp8,0,0.01863040030002594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,12,4,128,1,float16,fp8,0,0.23748159408569336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,12,12,128,1,float16,float16,0,0.1534816026687622
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,12,4,128,1,fp8,fp8,0,0.23829278945922852
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,12,12,128,1,float16,fp8,0,0.1602944016456604
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,12,1,128,1,float16,float16,0,0.10414559841156006
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,12,1,128,1,float16,fp8,0,0.12684160470962524
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,12,1,128,1,fp8,fp8,0,0.127019202709198
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,12,2,128,1,float16,float16,0,0.10447839498519898
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,12,2,128,1,float16,fp8,0,0.1264896035194397
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,12,2,128,1,fp8,fp8,0,0.12627359628677368
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,12,4,128,1,float16,float16,0,0.11384639739990235
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,12,4,128,1,float16,fp8,0,0.12735840082168579
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,12,12,128,1,float16,float16,0,0.0856544017791748
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,12,4,128,1,fp8,fp8,0,0.12681280374526976
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,12,1,128,1,float16,float16,0,0.0186271995306015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,12,12,128,1,float16,fp8,0,0.08893280029296875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,12,12,128,1,fp8,fp8,0,0.08887199759483337
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,12,1,128,1,float16,float16,0,0.06055359840393067
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,12,1,128,1,float16,fp8,0,0.06996480226516724
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,12,1,128,1,fp8,fp8,0,0.06982560157775879
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,12,2,128,1,float16,float16,0,0.06058880090713501
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,12,2,128,1,float16,fp8,0,0.06998400092124939
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,12,2,128,1,fp8,fp8,0,0.07015839815139771
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,12,4,128,1,float16,float16,0,0.06571360230445862
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,12,4,128,1,float16,fp8,0,0.07159039974212647
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,12,4,128,1,fp8,fp8,0,0.07088000178337098
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,12,12,128,1,float16,float16,0,0.04717440009117126
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,12,12,128,1,float16,fp8,0,0.04960800111293793
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,12,12,128,1,fp8,fp8,0,0.049236801266670224
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,12,1,128,1,float16,float16,0,0.034964799880981445
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,12,1,128,1,float16,fp8,0,0.03911519944667816
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,12,1,128,1,fp8,fp8,0,0.0394463986158371
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,12,2,128,1,float16,float16,0,0.03495199978351593
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,12,2,128,1,float16,fp8,0,0.03926720023155213
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,12,2,128,1,fp8,fp8,0,0.03922719955444336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,12,4,128,1,float16,float16,0,0.03712800145149231
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,12,4,128,1,float16,fp8,0,0.039243200421333314
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,12,4,128,1,fp8,fp8,0,0.039099198579788205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,12,12,128,1,float16,fp8,0,0.03264000117778778
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,12,12,128,1,fp8,fp8,0,0.03116639852523804
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,12,12,128,1,float16,float16,0,0.02863360047340393
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,12,1,128,1,float16,float16,0,0.0248416006565094
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,12,1,128,1,float16,fp8,0,0.026844799518585205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,12,1,128,1,fp8,fp8,0,0.026897600293159483
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,12,2,128,1,float16,float16,0,0.02473440021276474
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,12,4,128,1,float16,float16,0,0.030995199084281923
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,12,2,128,1,fp8,fp8,0,0.02688960134983063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,12,4,128,1,float16,float16,0,0.026759999990463256
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,12,4,128,1,float16,fp8,0,0.027060800790786745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,12,4,128,1,fp8,fp8,0,0.026979199051856993
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,12,12,128,1,float16,float16,0,0.01866399943828583
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,12,12,128,1,fp8,fp8,0,0.020641599595546723
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,12,1,128,1,float16,float16,0,0.01659359931945801
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,12,1,128,1,float16,fp8,0,0.018585599958896637
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,12,1,128,1,fp8,fp8,0,0.018580800294876097
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,12,2,128,1,float16,float16,0,0.016553600132465363
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,12,2,128,1,float16,fp8,0,0.018563200533390046
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,12,2,128,1,fp8,fp8,0,0.01858240067958832
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,12,4,128,1,float16,float16,0,0.016577599942684172
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,12,4,128,1,float16,fp8,0,0.01858399957418442
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,12,4,128,1,fp8,fp8,0,0.018580800294876097
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,12,12,128,1,float16,float16,0,0.01658080071210861
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,12,12,128,1,float16,fp8,0,0.016633599996566772
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,12,12,128,1,fp8,fp8,0,0.01724800020456314
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,12,1,128,1,float16,float16,0,0.01499200016260147
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,12,1,128,1,float16,fp8,0,0.016553600132465363
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,12,1,128,1,fp8,fp8,0,0.016574400663375854
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,12,2,128,1,float16,float16,0,0.014908799529075622
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,12,2,128,1,float16,fp8,0,0.016519999504089354
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,12,2,128,1,fp8,fp8,0,0.016630400717258454
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,12,4,128,1,float16,float16,0,0.016519999504089354
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,12,4,128,1,float16,fp8,0,0.016502399742603303
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,12,12,128,1,fp8,fp8,0,0.16031999588012696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,12,12,128,1,float16,float16,0,0.016476799547672272
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,12,12,128,1,float16,fp8,0,0.0166143998503685
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,12,1,128,1,float16,float16,0,0.014584000408649444
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,12,12,128,1,fp8,fp8,0,0.016630400717258454
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,12,1,128,1,float16,fp8,0,0.014596800506114959
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,12,1,128,1,fp8,fp8,0,0.01642560064792633
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,12,2,128,1,float16,float16,0,0.014683200418949128
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,12,2,128,1,float16,fp8,0,0.01652639955282211
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,12,2,128,1,fp8,fp8,0,0.016620799899101257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,12,4,128,1,float16,float16,0,0.015220800042152404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,12,4,128,1,float16,fp8,0,0.016505600512027742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,12,4,128,1,fp8,fp8,0,0.015241600573062897
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,12,12,128,1,float16,float16,0,0.016633599996566772
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,12,12,128,1,float16,fp8,0,0.015726399421691895
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,12,12,128,1,fp8,fp8,0,0.016622400283813475
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,12,1,128,1,float16,fp8,0,0.015009599924087524
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,12,1,128,1,fp8,fp8,0,0.014870400726795196
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,12,2,128,1,float16,float16,0,0.014980800449848175
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,12,2,128,1,float16,fp8,0,0.014567999541759491
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,12,2,128,1,fp8,fp8,0,0.014550399780273438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,12,4,128,1,float16,float16,0,0.014545600116252898
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,12,4,128,1,float16,fp8,0,0.014585599303245544
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,12,4,128,1,fp8,fp8,0,0.014587199687957764
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,12,1,128,1,float16,float16,0,0.11748160123825073
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,12,12,128,1,float16,fp8,0,0.18026399612426758
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,12,2,128,1,float16,fp8,0,0.027006399631500245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,12,1,128,1,float16,fp8,0,0.14101920127868653
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,12,1,128,1,fp8,fp8,0,0.14083839654922486
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,12,2,128,1,float16,float16,0,0.11861599683761596
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,12,2,128,1,float16,fp8,0,0.14098720550537108
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,12,1,128,1,fp8,fp8,0,0.02072799950838089
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,12,2,128,1,fp8,fp8,0,0.1416383981704712
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,12,4,128,1,float16,fp8,0,0.1406399965286255
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,12,4,128,1,fp8,fp8,0,0.1403599977493286
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,12,12,128,1,fp8,fp8,0,0.09438719749450683
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,12,12,128,1,float16,fp8,0,0.09447519779205323
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,12,1,128,1,float16,float16,0,0.06636319756507873
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,12,1,128,1,fp8,fp8,0,0.07610399723052978
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,12,2,128,1,float16,float16,0,0.06667360067367553
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,12,2,128,1,float16,fp8,0,0.07628480195999146
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,12,4,128,1,fp8,fp8,0,0.016521599888801575
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,12,2,128,1,fp8,fp8,0,0.07615360021591186
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,12,4,128,1,float16,float16,0,0.07080000042915344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,12,4,128,1,float16,fp8,0,0.07680320143699645
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,12,12,128,1,float16,float16,0,0.051374399662017824
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,12,12,128,1,float16,fp8,0,0.05250399708747864
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,12,12,128,1,fp8,fp8,0,0.05202879905700684
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,12,1,128,1,float16,float16,0,0.03789759874343872
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,12,1,128,1,float16,float16,0,0.014582400023937226
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,12,1,128,1,float16,fp8,0,0.0432096004486084
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,12,1,128,1,fp8,fp8,0,0.043191999197006226
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,12,2,128,1,float16,float16,0,0.03821280002593994
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,12,2,128,1,float16,fp8,0,0.04325439929962158
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,12,2,128,1,fp8,fp8,0,0.04323039948940277
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,12,4,128,1,float16,float16,0,0.04110719859600067
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,12,4,128,1,float16,fp8,0,0.04321120083332062
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,12,4,128,1,fp8,fp8,0,0.043279999494552614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,12,12,128,1,float16,float16,0,0.026638400554656983
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,12,12,128,1,float16,fp8,0,0.030272001028060914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,12,12,128,1,fp8,fp8,0,0.030726400017738343
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,12,1,128,1,float16,float16,0,0.02290080040693283
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,12,1,128,1,float16,fp8,0,0.026548799872398377
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,12,1,128,1,fp8,fp8,0,0.026363199949264525
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,12,2,128,1,float16,float16,0,0.0229312002658844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,12,2,128,1,float16,fp8,0,0.02670240104198456
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,12,2,128,1,fp8,fp8,0,0.02680639922618866
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,12,4,128,1,float16,float16,0,0.024771200120449068
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,12,4,128,1,float16,fp8,0,0.026830399036407472
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,12,12,128,1,float16,float16,0,0.018665599822998046
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,12,4,128,1,fp8,fp8,0,0.02576960027217865
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,12,12,128,1,float16,fp8,0,0.020681600272655486
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,12,12,128,1,fp8,fp8,0,0.020659199357032774
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,12,12,128,1,float16,fp8,0,0.020678399503231047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,12,1,128,1,float16,float16,0,0.016625599563121797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,12,1,128,1,float16,fp8,0,0.018620799481868743
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,12,1,128,1,fp8,fp8,0,0.01863040030002594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,12,4,128,1,float16,float16,0,0.12689119577407837
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,12,2,128,1,float16,float16,0,0.016702400147914888
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,12,2,128,1,float16,fp8,0,0.01860480010509491
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,12,2,128,1,fp8,fp8,0,0.01860959976911545
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,12,4,128,1,float16,float16,0,0.018246400356292724
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,12,4,128,1,float16,fp8,0,0.018668800592422485
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,12,4,128,1,fp8,fp8,0,0.018692800402641298
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,12,12,128,1,float16,float16,0,0.013257600367069244
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,12,12,128,1,float16,float16,0,0.08805279731750489
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,12,12,128,1,float16,fp8,0,0.014555199444293976
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,12,12,128,1,fp8,fp8,0,0.014590400457382201
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,12,1,128,1,float16,fp8,0,0.012503999471664428
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,12,2,128,1,float16,float16,0,0.012503999471664428
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,12,2,128,1,float16,fp8,0,0.012544000148773193
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,12,2,128,1,fp8,fp8,0,0.012646399438381195
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,12,1,128,1,float16,fp8,0,0.07537279725074768
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,12,4,128,1,float16,fp8,0,0.012547199428081513
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,12,12,128,1,float16,float16,0,0.012488000094890594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,12,4,128,1,fp8,fp8,0,0.012513600289821625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,12,12,128,1,fp8,fp8,0,0.012529599666595458
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,12,1,128,1,float16,fp8,0,0.012535999715328216
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,12,1,128,1,fp8,fp8,0,0.012430399656295776
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,12,2,128,1,float16,float16,0,0.011340799927711486
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,12,2,128,1,float16,fp8,0,0.012503999471664428
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,12,4,128,1,fp8,fp8,0,0.0766207993030548
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,12,2,128,1,fp8,fp8,0,0.012495999783277511
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,12,4,128,1,float16,float16,0,0.012430399656295776
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,12,4,128,1,float16,fp8,0,0.012465599924325943
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,12,4,128,1,fp8,fp8,0,0.012464000284671784
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,12,12,128,1,float16,float16,0,0.012432000041007996
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,12,12,128,1,float16,fp8,0,0.012427199631929398
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,12,12,128,1,fp8,fp8,0,0.01250240057706833
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,12,1,128,1,float16,float16,0,0.011502400040626526
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,12,1,128,1,float16,fp8,0,0.011529599875211715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,12,1,128,1,fp8,fp8,0,0.011502400040626526
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,12,2,128,1,float16,float16,0,0.012059199810028075
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,12,2,128,1,fp8,fp8,0,0.011475200206041336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,12,4,128,1,float16,fp8,0,0.01170239970088005
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,12,4,128,1,fp8,fp8,0,0.012532800436019897
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,12,12,128,1,float16,float16,0,0.012417600303888322
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,12,12,128,1,float16,fp8,0,0.01213119998574257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,12,12,128,1,fp8,fp8,0,0.01212640032172203
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,12,1,128,1,float16,float16,0,0.011078400164842605
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,12,1,128,1,float16,fp8,0,0.01098719984292984
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,12,1,128,1,fp8,fp8,0,0.010911999642848969
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,12,2,128,1,float16,float16,0,0.010684800148010255
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,12,2,128,1,float16,fp8,0,0.010454399883747101
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,12,2,128,1,fp8,fp8,0,0.01053439974784851
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,12,4,128,1,float16,float16,0,0.010492800176143647
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,12,4,128,1,float16,fp8,0,0.01051200032234192
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,12,4,128,1,fp8,fp8,0,0.0115167997777462
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,12,1,128,1,float16,float16,0,0.09200159907341003
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,12,1,128,1,float16,fp8,0,0.10092639923095703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,12,1,128,1,float16,float16,0,0.012489599734544754
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,12,1,128,1,fp8,fp8,0,0.10091359615325927
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,12,1,128,1,fp8,fp8,0,0.012404800206422806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,12,4,128,1,float16,float16,0,0.0124719999730587
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,12,2,128,1,float16,fp8,0,0.10099040269851685
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,12,2,128,1,fp8,fp8,0,0.10133919715881348
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,12,4,128,1,float16,float16,0,0.09750720262527465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,12,4,128,1,float16,fp8,0,0.10169919729232788
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,12,1,128,1,float16,float16,0,0.012494400143623352
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,12,12,128,1,float16,float16,0,0.06220160126686096
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,12,4,128,1,fp8,fp8,0,0.10256799459457397
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,12,12,128,1,float16,fp8,0,0.06383839845657349
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,12,12,128,1,fp8,fp8,0,0.06515200138092041
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,12,1,128,1,float16,float16,0,0.051318401098251344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,12,1,128,1,float16,fp8,0,0.055508798360824584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,12,1,128,1,fp8,fp8,0,0.05557760000228882
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,12,2,128,1,float16,float16,0,0.05144799947738647
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,12,2,128,1,float16,fp8,0,0.05551040172576904
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,12,2,128,1,fp8,fp8,0,0.055731201171875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,12,4,128,1,float16,fp8,0,0.055542397499084475
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,12,12,128,1,float16,float16,0,0.033769598603248595
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,12,12,128,1,float16,fp8,0,0.03710399866104126
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,12,12,128,1,fp8,fp8,0,0.037099200487136844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,12,2,128,1,float16,fp8,0,0.01194079965353012
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,12,4,128,1,float16,float16,0,0.010673599690198899
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,12,1,128,1,float16,fp8,0,0.032924801111221313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,12,2,128,1,float16,float16,0,0.030884799361228944
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,12,2,128,1,float16,fp8,0,0.03300319910049439
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,12,2,128,1,fp8,fp8,0,0.033024001121521
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,12,4,128,1,float16,float16,0,0.031825599074363706
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,12,4,128,1,float16,fp8,0,0.03296320140361786
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,12,4,128,1,fp8,fp8,0,0.0329008013010025
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,12,12,128,1,float16,float16,0,0.020654399693012238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,12,12,128,1,float16,fp8,0,0.022668799757957457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,12,12,128,1,fp8,fp8,0,0.022780799865722658
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,12,1,128,1,float16,float16,0,0.018702399730682374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,12,1,128,1,float16,fp8,0,0.02061759978532791
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,12,1,128,1,fp8,fp8,0,0.02067520022392273
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,12,2,128,1,float16,float16,0,0.01866399943828583
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,12,2,128,1,float16,fp8,0,0.02072640061378479
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,12,2,128,1,fp8,fp8,0,0.020638400316238405
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,12,4,128,1,float16,float16,0,0.019782400131225585
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,12,2,128,1,float16,float16,0,0.0927295982837677
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,12,4,128,1,float16,fp8,0,0.020623999834060668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,12,4,128,1,fp8,fp8,0,0.020660799741744996
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,12,12,128,1,float16,float16,0,0.01685439944267273
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,12,12,128,1,float16,fp8,0,0.012547199428081513
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,12,12,128,1,float16,fp8,0,0.01685120016336441
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,12,12,128,1,fp8,fp8,0,0.01663520038127899
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,12,1,128,1,float16,float16,0,0.014537599682807923
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,12,1,128,1,float16,fp8,0,0.016036799550056456
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,12,2,128,1,float16,float16,0,0.014580799639225006
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,12,2,128,1,float16,fp8,0,0.016564799845218657
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,12,2,128,1,fp8,fp8,0,0.01480640023946762
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,12,4,128,1,float16,float16,0,0.014572800695896148
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,12,4,128,1,float16,fp8,0,0.014758400619029999
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,12,4,128,1,fp8,fp8,0,0.014912000298500061
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,12,12,128,1,float16,float16,0,0.011169599741697312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,12,12,128,1,float16,fp8,0,0.012457600235939026
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,12,12,128,1,fp8,fp8,0,0.012503999471664428
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,12,4,128,1,float16,float16,0,0.05403040051460266
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,12,1,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,12,1,128,1,float16,fp8,0,0.010476800054311753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,12,4,128,1,fp8,fp8,0,0.05552319884300232
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,12,2,128,1,float16,float16,0,0.010497599840164185
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,12,2,128,1,float16,fp8,0,0.010598400235176086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,12,2,128,1,fp8,fp8,0,0.01072160005569458
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,12,4,128,1,float16,fp8,0,0.010739199817180634
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,12,4,128,1,float16,float16,0,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,12,4,128,1,fp8,fp8,0,0.010590399801731109
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,12,12,128,1,float16,float16,0,0.010480000078678131
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,12,12,128,1,float16,fp8,0,0.010427200049161912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,12,12,128,1,fp8,fp8,0,0.010515200346708298
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,12,1,128,1,float16,float16,0,0.010489600151777268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,12,1,128,1,float16,float16,0,0.030913600325584413
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,12,1,128,1,float16,fp8,0,0.010414399951696397
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,12,2,128,1,float16,float16,0,0.010499200224876404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,12,1,128,1,fp8,fp8,0,0.010473600029945374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,12,2,128,1,float16,fp8,0,0.010499200224876404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,12,2,128,1,fp8,fp8,0,0.01051200032234192
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,12,4,128,1,float16,float16,0,0.010452800244092942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,12,4,128,1,float16,fp8,0,0.010444799810647965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,12,4,128,1,fp8,fp8,0,0.010499200224876404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,12,12,128,1,float16,float16,0,0.010502400249242783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,12,1,128,1,fp8,fp8,0,0.033020800352096556
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,12,12,128,1,fp8,fp8,0,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,12,1,128,1,float16,float16,0,0.010489600151777268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,12,1,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,12,2,128,1,float16,float16,0,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,12,1,128,1,fp8,fp8,0,0.010492800176143647
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,12,2,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,12,2,128,1,fp8,fp8,0,0.010396800190210342
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,12,4,128,1,float16,float16,0,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,12,12,128,1,float16,float16,0,0.010497599840164185
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,12,4,128,1,float16,fp8,0,0.010473600029945374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,12,12,128,1,float16,fp8,0,0.010499200224876404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,12,12,128,1,fp8,fp8,0,0.01048159971833229
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,12,1,128,1,float16,float16,0,0.010444799810647965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,12,1,128,1,fp8,fp8,0,0.010398399829864503
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,12,2,128,1,float16,float16,0,0.010486400127410889
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,12,2,128,1,float16,fp8,0,0.010486400127410889
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,12,2,128,1,fp8,fp8,0,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,12,1,128,1,float16,fp8,0,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,12,4,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,12,4,128,1,float16,fp8,0,0.010419200360774993
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,12,4,128,1,fp8,fp8,0,0.010502400249242783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,12,1,128,1,float16,float16,0,0.07939680218696595
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,12,1,128,1,float16,fp8,0,0.08214079737663268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,12,1,128,1,fp8,fp8,0,0.08127679824829101
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,12,2,128,1,float16,float16,0,0.07947679758071899
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,12,2,128,1,float16,fp8,0,0.0824176013469696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,12,4,128,1,float16,float16,0,0.08163999915122985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,12,4,128,1,float16,fp8,0,0.08237760066986084
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,12,4,128,1,fp8,fp8,0,0.08193920254707336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,12,12,128,1,float16,float16,0,0.04835200011730194
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,12,12,128,1,float16,fp8,0,0.049614399671554565
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,12,1,128,1,float16,float16,0,0.04507839977741242
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,12,1,128,1,float16,fp8,0,0.045633599162101746
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,12,1,128,1,fp8,fp8,0,0.04576480090618133
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,12,2,128,1,float16,float16,0,0.04499039947986603
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,12,2,128,1,float16,fp8,0,0.0454255998134613
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,12,1,128,1,fp8,fp8,0,0.015535999834537507
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,12,2,128,1,fp8,fp8,0,0.045388799905776975
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,12,4,128,1,float16,float16,0,0.0456928014755249
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,12,4,128,1,float16,fp8,0,0.04553439915180206
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,12,4,128,1,fp8,fp8,0,0.04551999866962433
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,12,12,128,1,float16,float16,0,0.028935998678207397
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,12,12,128,1,float16,fp8,0,0.030883198976516722
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,12,12,128,1,fp8,fp8,0,0.02996160089969635
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,12,1,128,1,float16,float16,0,0.027132800221443175
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,12,1,128,1,float16,fp8,0,0.02832320034503937
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,12,1,128,1,fp8,fp8,0,0.02776640057563782
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,12,2,128,1,float16,float16,0,0.026888000965118408
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,12,2,128,1,float16,fp8,0,0.028200000524520874
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,12,2,128,1,fp8,fp8,0,0.02849920094013214
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,12,4,128,1,float16,float16,0,0.02749119997024536
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,12,4,128,1,float16,fp8,0,0.028803199529647827
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,12,4,128,1,fp8,fp8,0,0.028007999062538147
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,12,12,128,1,float16,float16,0,0.01858399957418442
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,12,12,128,1,float16,fp8,0,0.018668800592422485
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,12,12,128,1,fp8,fp8,0,0.018676799535751343
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,12,1,128,1,float16,float16,0,0.017550399899482726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,12,12,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,12,1,128,1,float16,fp8,0,0.018580800294876097
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,12,1,128,1,fp8,fp8,0,0.018595199286937713
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,12,2,128,1,float16,float16,0,0.016892799735069276
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,12,2,128,1,float16,fp8,0,0.018598400056362152
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,12,2,128,1,fp8,fp8,0,0.018593600392341612
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,12,4,128,1,float16,float16,0,0.018622399866580965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,12,4,128,1,float16,fp8,0,0.018596799671649934
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,12,4,128,1,fp8,fp8,0,0.01820160001516342
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,12,12,128,1,float16,float16,0,0.014609600603580474
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,12,12,128,1,float16,fp8,0,0.01451680064201355
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,12,12,128,1,fp8,fp8,0,0.014579200744628906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,12,1,128,1,float16,fp8,0,0.014524799585342408
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,12,1,128,1,fp8,fp8,0,0.014484800398349762
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,12,2,128,1,float16,float16,0,0.012676799297332763
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,12,2,128,1,float16,fp8,0,0.01449279934167862
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,12,2,128,1,fp8,fp8,0,0.014480000734329224
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,12,4,128,1,float16,float16,0,0.01292639970779419
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,12,4,128,1,float16,fp8,0,0.014505599439144135
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,12,4,128,1,fp8,fp8,0,0.01448799967765808
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,12,12,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,12,12,128,1,float16,fp8,0,0.010396800190210342
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,12,12,128,1,fp8,fp8,0,0.01051200032234192
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,12,1,128,1,float16,float16,0,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,12,1,128,1,float16,fp8,0,0.010409600287675857
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,12,1,128,1,fp8,fp8,0,0.010414399951696397
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,12,2,128,1,float16,float16,0,0.010507199913263321
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,12,2,128,1,fp8,fp8,0,0.08225759863853455
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,12,2,128,1,fp8,fp8,0,0.0104032002389431
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,12,4,128,1,float16,float16,0,0.010497599840164185
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,12,4,128,1,float16,fp8,0,0.010491199791431427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,12,12,128,1,float16,float16,0,0.01045759990811348
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,12,12,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,12,12,128,1,fp8,fp8,0,0.010489600151777268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,12,1,128,1,float16,float16,0,0.010446400195360184
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,12,12,128,1,fp8,fp8,0,0.050491201877594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,12,1,128,1,float16,fp8,0,0.010443200170993806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,12,1,128,1,fp8,fp8,0,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,12,2,128,1,float16,float16,0,0.010505600273609162
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,12,2,128,1,float16,fp8,0,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,12,2,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,12,4,128,1,float16,float16,0,0.010471999645233154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,12,4,128,1,float16,fp8,0,0.010449600219726563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,12,4,128,1,fp8,fp8,0,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,12,12,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,12,12,128,1,float16,fp8,0,0.01048320010304451
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,12,12,128,1,fp8,fp8,0,0.010351999849081039
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,12,1,128,1,float16,float16,0,0.010419200360774993
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,12,1,128,1,float16,fp8,0,0.010451199859380722
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,12,1,128,1,fp8,fp8,0,0.010424000024795533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,12,2,128,1,float16,float16,0,0.01045600026845932
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,12,2,128,1,float16,fp8,0,0.010468800365924836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,12,4,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,12,4,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,12,4,128,1,float16,fp8,0,0.010396800190210342
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,12,12,128,1,float16,float16,0,0.010449600219726563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,12,12,128,1,float16,fp8,0,0.01040479987859726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,12,12,128,1,fp8,fp8,0,0.010326399654150008
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,12,1,128,1,float16,float16,0,0.010367999970912933
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,12,1,128,1,float16,fp8,0,0.009401600062847137
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,12,1,128,1,fp8,fp8,0,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,12,2,128,1,float16,fp8,0,0.009460800141096116
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,12,2,128,1,float16,float16,0,0.00931359976530075
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,12,2,128,1,fp8,fp8,0,0.009726399928331375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,12,4,128,1,float16,float16,0,0.00933919996023178
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,12,4,128,1,float16,fp8,0,0.009864000231027603
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,12,4,128,1,fp8,fp8,0,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,12,1,128,1,float16,float16,0,0.07690719962120056
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,12,1,128,1,float16,fp8,0,0.0741375982761383
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,12,1,128,1,fp8,fp8,0,0.0741312026977539
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,12,2,128,1,float16,float16,0,0.07605760097503662
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,12,4,128,1,fp8,fp8,0,0.010500799864530563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,12,1,128,1,float16,float16,0,0.01432960033416748
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,12,2,128,1,fp8,fp8,0,0.07447999715805054
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,12,4,128,1,float16,float16,0,0.07815520167350769
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,12,4,128,1,float16,fp8,0,0.07415680289268493
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,12,4,128,1,fp8,fp8,0,0.0747215986251831
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,12,12,128,1,float16,float16,0,0.045265600085258484
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,12,12,128,1,float16,fp8,0,0.04498240053653717
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,12,12,128,1,fp8,fp8,0,0.043724799156188966
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,12,1,128,1,float16,float16,0,0.044268798828125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,12,1,128,1,float16,fp8,0,0.0416128009557724
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,12,1,128,1,fp8,fp8,0,0.04272800087928772
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,12,2,128,1,float16,fp8,0,0.010424000024795533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,12,2,128,1,float16,float16,0,0.043515199422836305
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,12,2,128,1,float16,fp8,0,0.042305600643157956
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,12,4,128,1,float16,float16,0,0.04376319944858551
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,12,4,128,1,fp8,fp8,0,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,12,4,128,1,float16,fp8,0,0.042438399791717527
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,12,4,128,1,fp8,fp8,0,0.04173280000686645
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,12,12,128,1,float16,float16,0,0.027399998903274537
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,12,12,128,1,float16,fp8,0,0.026844799518585205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,12,12,128,1,fp8,fp8,0,0.026907199621200563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,12,1,128,1,float16,float16,0,0.026870399713516235
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,12,1,128,1,float16,fp8,0,0.0254831999540329
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,12,1,128,1,fp8,fp8,0,0.02643359899520874
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,12,2,128,1,float16,float16,0,0.026787200570106508
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,12,2,128,1,fp8,fp8,0,0.026662400364875792
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,12,4,128,1,float16,float16,0,0.026817598938941957
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,12,4,128,1,float16,fp8,0,0.026347199082374574
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,12,4,128,1,fp8,fp8,0,0.025779199600219727
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,12,12,128,1,float16,float16,0,0.018145599961280824
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,12,12,128,1,float16,fp8,0,0.016604800522327424
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,12,12,128,1,fp8,fp8,0,0.016606399416923524
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,12,1,128,1,float16,float16,0,0.01656160056591034
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,12,1,128,1,float16,fp8,0,0.016601599752902985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,12,1,128,1,fp8,fp8,0,0.016598400473594666
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,12,2,128,1,fp8,fp8,0,0.010023999959230423
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,12,2,128,1,float16,fp8,0,0.016551999747753142
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,12,4,128,1,float16,float16,0,0.016648000478744505
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,12,4,128,1,float16,fp8,0,0.016553600132465363
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,12,1,128,1,fp8,fp8,0,0.010480000078678131
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,12,12,128,1,float16,float16,0,0.014532800018787383
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,12,12,128,1,float16,fp8,0,0.013953599333763122
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,12,12,128,1,fp8,fp8,0,0.013790400326251983
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,12,1,128,1,float16,float16,0,0.013147200644016265
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,12,1,128,1,float16,fp8,0,0.012833599746227265
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,12,2,128,1,float16,float16,0,0.012990400195121765
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,12,2,128,1,float16,fp8,0,0.012747199833393097
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,12,2,128,1,fp8,fp8,0,0.012601600587368011
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,12,4,128,1,float16,float16,0,0.01252480000257492
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,12,4,128,1,float16,fp8,0,0.012559999525547028
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,12,2,128,1,float16,fp8,0,0.0749888002872467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,12,4,128,1,fp8,fp8,0,0.012483199685811996
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,12,12,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,12,12,128,1,float16,fp8,0,0.010496000200510025
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,12,12,128,1,fp8,fp8,0,0.01045759990811348
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,12,1,128,1,float16,float16,0,0.010452800244092942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,12,1,128,1,float16,fp8,0,0.010438399761915207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,12,1,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,12,2,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,12,2,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,12,2,128,1,float16,fp8,0,0.010499200224876404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,12,4,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,12,4,128,1,float16,fp8,0,0.010496000200510025
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,12,4,128,1,fp8,fp8,0,0.010406400263309478
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,12,12,128,1,float16,float16,0,0.010492800176143647
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,12,12,128,1,float16,fp8,0,0.010384000092744827
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,12,2,128,1,fp8,fp8,0,0.04193440079689026
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,12,12,128,1,fp8,fp8,0,0.01045600026845932
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,12,1,128,1,float16,float16,0,0.010387200117111205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,12,1,128,1,float16,fp8,0,0.010409600287675857
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,12,1,128,1,fp8,fp8,0,0.010363200306892395
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,12,2,128,1,float16,fp8,0,0.010377600044012069
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,12,2,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,12,2,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,12,4,128,1,float16,float16,0,0.010369600355625152
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,12,4,128,1,float16,fp8,0,0.010441599786281586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,12,12,128,1,float16,float16,0,0.01040000021457672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,12,2,128,1,float16,fp8,0,0.026107200980186464
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,12,12,128,1,float16,fp8,0,0.010471999645233154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,12,12,128,1,fp8,fp8,0,0.010393600165843963
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,12,1,128,1,float16,float16,0,0.0091279998421669
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,12,1,128,1,float16,fp8,0,0.009958399832248688
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,12,1,128,1,fp8,fp8,0,0.010412800312042236
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,12,2,128,1,float16,float16,0,0.009185600280761718
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,12,2,128,1,float16,fp8,0,0.008497600257396699
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,12,2,128,1,fp8,fp8,0,0.009544000029563904
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,12,4,128,1,float16,float16,0,0.010065600275993347
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,12,4,128,1,fp8,fp8,0,0.009369599819183349
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,12,12,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,12,12,128,1,float16,fp8,0,0.009388799965381622
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,12,12,128,1,fp8,fp8,0,0.009900800138711929
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,12,1,128,1,float16,float16,0,0.009443199634552002
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,12,1,128,1,float16,fp8,0,0.009142400324344635
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,12,2,128,1,float16,float16,0,0.016598400473594666
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,12,1,128,1,fp8,fp8,0,0.00846560001373291
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,12,2,128,1,float16,float16,0,0.009518399834632874
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,12,2,128,1,float16,fp8,0,0.008433599770069123
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,12,2,128,1,fp8,fp8,0,0.008710400015115739
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,12,2,128,1,fp8,fp8,0,0.01669120043516159
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,12,4,128,1,float16,float16,0,0.01037919968366623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,12,4,128,1,float16,fp8,0,0.008774399757385254
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,12,4,128,1,fp8,fp8,0,0.008432000130414962
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,12,4,128,1,fp8,fp8,0,0.016624000668525696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,12,1,128,1,float16,float16,0,0.07525759935379028
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,12,2,128,1,float16,float16,0,0.07515360116958618
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,12,2,128,1,float16,fp8,0,0.0699728012084961
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,12,2,128,1,fp8,fp8,0,0.0697983980178833
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,12,4,128,1,float16,float16,0,0.07517439723014832
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,12,4,128,1,float16,fp8,0,0.06979359984397888
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,12,4,128,1,fp8,fp8,0,0.07023680210113525
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,12,12,128,1,float16,float16,0,0.043424001336097716
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,12,12,128,1,float16,fp8,0,0.04005599915981293
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,12,12,128,1,fp8,fp8,0,0.03939839899539947
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,12,1,128,1,float16,float16,0,0.043140798807144165
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,12,1,128,1,float16,fp8,0,0.03928160071372986
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,12,1,128,1,fp8,fp8,0,0.03980480134487152
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,12,2,128,1,float16,float16,0,0.04277600049972534
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,12,2,128,1,float16,fp8,0,0.03949280083179474
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,12,2,128,1,fp8,fp8,0,0.03938719928264618
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,12,4,128,1,float16,float16,0,0.04260480105876922
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,12,4,128,1,float16,fp8,0,0.03945919871330261
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,12,4,128,1,fp8,fp8,0,0.039470401406288144
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,12,12,128,1,float16,float16,0,0.02677600085735321
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,12,12,128,1,float16,fp8,0,0.024828800559043886
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,12,12,128,1,fp8,fp8,0,0.024751999974250795
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,12,1,128,1,float16,float16,0,0.02592960000038147
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,12,1,128,1,float16,fp8,0,0.024795199930667877
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,12,1,128,1,fp8,fp8,0,0.024743999540805816
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,12,2,128,1,float16,float16,0,0.026761600375175477
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,12,2,128,1,float16,fp8,0,0.02481600046157837
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,12,2,128,1,fp8,fp8,0,0.024801599979400634
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,12,4,128,1,float16,float16,0,0.025935998558998107
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,12,4,128,1,float16,fp8,0,0.024809600412845613
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,12,4,128,1,fp8,fp8,0,0.02483679950237274
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,12,12,128,1,float16,fp8,0,0.01669120043516159
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,12,4,128,1,fp8,fp8,0,0.01014079973101616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,12,12,128,1,fp8,fp8,0,0.016601599752902985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,12,12,128,1,float16,float16,0,0.017524799704551695
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,12,1,128,1,float16,float16,0,0.01659359931945801
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,12,2,128,1,float16,float16,0,0.01658560037612915
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,12,2,128,1,float16,fp8,0,0.016599999368190767
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,12,4,128,1,float16,fp8,0,0.008980800211429597
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,12,2,128,1,fp8,fp8,0,0.016596800088882445
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,12,4,128,1,float16,float16,0,0.016630400717258454
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,12,4,128,1,float16,fp8,0,0.016601599752902985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,12,4,128,1,fp8,fp8,0,0.016612799465656282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,12,12,128,1,float16,float16,0,0.01451839953660965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,12,1,128,1,fp8,fp8,0,0.01658719927072525
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,12,12,128,1,float16,fp8,0,0.012537600100040435
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,12,12,128,1,fp8,fp8,0,0.013324800133705138
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,12,1,128,1,float16,float16,0,0.01329600065946579
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,12,1,128,1,float16,fp8,0,0.012535999715328216
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,12,1,128,1,fp8,fp8,0,0.012654399871826172
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,12,2,128,1,float16,float16,0,0.01281919926404953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,12,2,128,1,float16,fp8,0,0.012507200241088867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,12,2,128,1,fp8,fp8,0,0.012484800070524216
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,12,4,128,1,float16,float16,0,0.013252800703048706
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,12,1,128,1,float16,fp8,0,0.07008799910545349
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,12,4,128,1,fp8,fp8,0,0.012545600533485413
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,12,12,128,1,float16,float16,0,0.010494399815797806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,12,12,128,1,float16,fp8,0,0.010412800312042236
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,12,4,128,1,float16,fp8,0,0.012489599734544754
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,12,1,128,1,float16,float16,0,0.01043360009789467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,12,1,128,1,float16,fp8,0,0.010367999970912933
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,12,1,128,1,fp8,fp8,0,0.01048320010304451
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,12,2,128,1,float16,float16,0,0.01040000021457672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,12,2,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,12,2,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,12,4,128,1,float16,fp8,0,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,12,1,128,1,fp8,fp8,0,0.012488000094890594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,12,4,128,1,fp8,fp8,0,0.010489600151777268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,12,12,128,1,float16,float16,0,0.010443200170993806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,12,12,128,1,float16,fp8,0,0.008736000210046769
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,12,12,128,1,fp8,fp8,0,0.010396800190210342
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,12,1,128,1,float16,float16,0,0.010364799946546554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,12,1,128,1,float16,fp8,0,0.008473599702119828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,12,2,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,12,1,128,1,fp8,fp8,0,0.009030400216579438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,12,2,128,1,float16,fp8,0,0.00856959968805313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,12,2,128,1,fp8,fp8,0,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,12,4,128,1,fp8,fp8,0,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,12,12,128,1,float16,float16,0,0.01045600026845932
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,12,12,128,1,float16,fp8,0,0.010390400141477584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,12,12,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,12,1,128,1,float16,float16,0,0.010356800258159637
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,12,1,128,1,float16,fp8,0,0.010435199737548828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,12,1,128,1,fp8,fp8,0,0.010388799756765366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,12,2,128,1,float16,float16,0,0.010372799634933472
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,12,2,128,1,float16,fp8,0,0.009352000057697296
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,12,2,128,1,fp8,fp8,0,0.009380800276994705
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,12,4,128,1,float16,float16,0,0.00998080000281334
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,12,4,128,1,float16,fp8,0,0.009566400200128555
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,12,4,128,1,fp8,fp8,0,0.009440000355243682
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,12,12,128,1,float16,float16,0,0.010396800190210342
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,12,12,128,1,float16,fp8,0,0.008460800349712371
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,12,12,128,1,fp8,fp8,0,0.00841279998421669
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,12,1,128,1,float16,float16,0,0.009055999666452407
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,12,1,128,1,float16,fp8,0,0.00989919975399971
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,12,1,128,1,fp8,fp8,0,0.010062400251626968
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,12,2,128,1,float16,float16,0,0.010387200117111205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,12,2,128,1,float16,fp8,0,0.010369600355625152
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,12,2,128,1,fp8,fp8,0,0.010409600287675857
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,12,4,128,1,float16,float16,0,0.01040000021457672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,12,4,128,1,float16,fp8,0,0.009859199821949004
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,12,1,128,1,fp8,fp8,0,0.06992160081863404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,12,4,128,1,fp8,fp8,0,0.010411199927330018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,12,12,128,1,fp8,fp8,0,0.010468800365924836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,12,4,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,12,4,128,1,float16,float16,0,0.010388799756765366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,12,4,128,1,float16,fp8,0,0.00840959995985031
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,12,1,128,1,float16,fp8,0,0.016624000668525696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,8,1,128,1,float16,fp8,0,2.897617530822754
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,8,1,128,1,fp8,fp8,0,2.896487998962402
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,8,1,128,1,float16,float16,0,3.4121585845947267
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,8,2,128,1,float16,fp8,0,2.9198047637939455
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,8,2,128,1,float16,float16,0,3.4845088958740233
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,8,2,128,1,fp8,fp8,0,2.8939504623413086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,8,128,1,float16,float16,0,1.7984848022460938
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,8,128,1,float16,fp8,0,1.8110784530639648
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,8,128,1,fp8,fp8,0,1.5479935646057128
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,1,128,1,float16,fp8,0,1.5089455604553224
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,8,4,128,1,float16,fp8,0,2.914302444458008
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,1,128,1,float16,float16,0,1.933577537536621
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,8,4,128,1,fp8,fp8,0,2.8986656188964846
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,8,4,128,1,float16,float16,0,3.539041519165039
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,1,128,1,fp8,fp8,0,1.7452991485595704
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,2,128,1,float16,fp8,0,1.5315279960632324
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,2,128,1,float16,float16,0,1.6924543380737305
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,2,128,1,fp8,fp8,0,1.5102864265441895
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,8,128,1,float16,float16,0,1.068496036529541
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,4,128,1,float16,float16,0,1.778553581237793
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,4,128,1,fp8,fp8,0,1.5305024147033692
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,4,128,1,float16,fp8,0,1.7691936492919922
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,8,128,1,float16,fp8,0,0.8564080238342285
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,8,128,1,fp8,fp8,0,0.8274448394775391
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,1,128,1,float16,fp8,0,0.8173680305480957
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,1,128,1,float16,float16,0,1.0038880348205566
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,1,128,1,fp8,fp8,0,0.9501775741577149
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,2,128,1,float16,fp8,0,0.8235039710998535
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,2,128,1,float16,float16,0,1.0524880409240722
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,2,128,1,fp8,fp8,0,0.8513343811035157
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,4,128,1,float16,float16,0,0.9227999687194824
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,4,128,1,float16,fp8,0,0.8182191848754883
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,8,128,1,float16,fp8,0,0.509887981414795
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,4,128,1,fp8,fp8,0,0.819215965270996
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,8,128,1,fp8,fp8,0,0.4736639976501465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,1,128,1,float16,fp8,0,0.47767038345336915
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,1,128,1,float16,float16,0,0.5976064205169678
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,1,128,1,fp8,fp8,0,0.47441439628601073
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,2,128,1,float16,float16,0,0.5046703815460205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,2,128,1,float16,fp8,0,0.5016367912292481
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,2,128,1,fp8,fp8,0,0.47216482162475587
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,4,128,1,float16,float16,0,0.5131552219390869
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,4,128,1,float16,fp8,0,0.47547039985656736
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,4,128,1,fp8,fp8,0,0.5654687881469727
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,8,1,128,1,float16,fp8,0,1.717545509338379
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,8,1,128,1,fp8,fp8,0,1.7330415725708008
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,8,1,128,1,float16,float16,0,1.979524803161621
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,8,2,128,1,fp8,fp8,0,1.7126079559326173
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,8,2,128,1,float16,float16,0,1.9553056716918946
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,8,4,128,1,float16,float16,0,1.9744064331054687
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,8,128,1,float16,float16,0,0.595576000213623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,8,128,1,float16,fp8,0,0.9073967933654785
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,8,128,1,float16,float16,0,1.1628751754760742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,8,128,1,fp8,fp8,0,1.1161215782165528
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,8,4,128,1,float16,fp8,0,1.721104049682617
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,1,128,1,float16,float16,0,1.0826607704162599
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,8,4,128,1,fp8,fp8,0,1.710963249206543
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,1,128,1,float16,fp8,0,0.903377628326416
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,1,128,1,fp8,fp8,0,0.9044848442077636
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,2,128,1,float16,float16,0,1.110193634033203
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,2,128,1,float16,fp8,0,1.0492112159729003
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,2,128,1,fp8,fp8,0,0.9228688240051269
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,4,128,1,float16,fp8,0,0.934540843963623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,8,128,1,float16,float16,0,0.5645664215087891
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,4,128,1,float16,float16,0,1.124903964996338
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,4,128,1,fp8,fp8,0,0.905844783782959
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,8,128,1,float16,fp8,0,0.603764820098877
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,8,128,1,fp8,fp8,0,0.5379424095153809
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,1,128,1,float16,float16,0,0.5403232097625732
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,1,128,1,float16,fp8,0,0.5166304111480713
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,1,128,1,fp8,fp8,0,0.5010784149169922
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,2,128,1,float16,float16,0,0.6155903816223145
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,8,2,128,1,float16,fp8,0,1.7134031295776366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,2,128,1,fp8,fp8,0,0.5451727867126465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,8,128,1,float16,float16,0,0.32316160202026367
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,4,128,1,float16,fp8,0,0.5076320171356201
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,4,128,1,fp8,fp8,0,0.5040095806121826
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,8,128,1,fp8,fp8,0,0.2966815948486328
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,8,128,1,float16,fp8,0,0.3381392002105713
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,1,128,1,float16,float16,0,0.32114880084991454
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,1,128,1,float16,fp8,0,0.29653120040893555
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,1,128,1,fp8,fp8,0,0.29800798892974856
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,2,128,1,float16,float16,0,0.30893120765686033
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,2,128,1,float16,fp8,0,0.29587039947509763
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,2,128,1,fp8,fp8,0,0.3156944036483765
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,4,128,1,float16,float16,0,0.32831358909606934
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,4,128,1,fp8,fp8,0,0.2954623937606812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,8,1,128,1,float16,fp8,0,1.2354800224304199
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,8,1,128,1,float16,float16,0,1.3604000091552735
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,8,1,128,1,fp8,fp8,0,1.2482208251953124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,8,2,128,1,float16,float16,0,1.3729071617126465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,8,2,128,1,float16,fp8,0,1.235865592956543
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,2,128,1,float16,fp8,0,0.5005184173583984
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,4,128,1,float16,float16,0,0.5533584117889404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,8,4,128,1,float16,float16,0,1.4263312339782714
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,8,4,128,1,float16,fp8,0,1.5564224243164062
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,8,128,1,float16,float16,0,0.7534272193908691
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,8,128,1,float16,fp8,0,0.6621119976043701
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,4,128,1,float16,fp8,0,0.2970304012298584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,8,128,1,fp8,fp8,0,0.7934512138366699
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,8,4,128,1,fp8,fp8,0,1.23689603805542
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,1,128,1,float16,float16,0,0.7384943962097168
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,1,128,1,float16,fp8,0,0.6693007946014404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,1,128,1,fp8,fp8,0,0.6605823993682861
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,2,128,1,float16,float16,0,0.713262414932251
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,2,128,1,fp8,fp8,0,0.6592800140380859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,2,128,1,float16,fp8,0,0.7318816184997559
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,4,128,1,float16,float16,0,0.7462800025939942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,8,128,1,float16,float16,0,0.4126431941986084
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,8,128,1,float16,fp8,0,0.37304799556732177
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,8,128,1,fp8,fp8,0,0.39388959407806395
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,4,128,1,float16,fp8,0,0.6641520023345947
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,1,128,1,float16,float16,0,0.4069183826446533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,1,128,1,float16,fp8,0,0.3711071968078613
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,1,128,1,fp8,fp8,0,0.36997280120849607
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,2,128,1,float16,float16,0,0.39594719409942625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,2,128,1,fp8,fp8,0,0.40067520141601565
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,4,128,1,float16,float16,0,0.4037487983703613
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,4,128,1,float16,fp8,0,0.37120800018310546
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,4,128,1,fp8,fp8,0,0.3759680032730103
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,8,128,1,float16,float16,0,0.2489327907562256
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,8,128,1,float16,fp8,0,0.23721280097961425
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,8,128,1,fp8,fp8,0,0.23772799968719482
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,1,128,1,float16,float16,0,0.23710880279541016
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,8,2,128,1,fp8,fp8,0,1.328219223022461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,1,128,1,float16,fp8,0,0.23298399448394774
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,1,128,1,fp8,fp8,0,0.22968800067901612
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,2,128,1,float16,float16,0,0.24305920600891112
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,2,128,1,fp8,fp8,0,0.23002879619598388
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,4,128,1,float16,float16,0,0.23985600471496582
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,4,128,1,float16,fp8,0,0.23281919956207275
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,4,128,1,fp8,fp8,0,0.22906239032745362
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,8,1,128,1,float16,fp8,0,1.6045824050903321
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,8,1,128,1,fp8,fp8,0,1.6025840759277343
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,8,1,128,1,float16,float16,0,1.7934000015258789
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,8,2,128,1,float16,float16,0,1.7986000061035157
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,8,2,128,1,float16,fp8,0,1.6053295135498047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,4,128,1,fp8,fp8,0,0.6616623878479004
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,2,128,1,float16,fp8,0,0.3705471992492676
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,8,128,1,float16,float16,0,0.9816080093383789
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,8,128,1,float16,fp8,0,0.8406335830688476
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,8,2,128,1,fp8,fp8,0,1.6370479583740234
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,8,128,1,fp8,fp8,0,0.8381216049194335
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,8,4,128,1,float16,float16,0,1.8104320526123048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,8,4,128,1,float16,fp8,0,1.871552085876465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,8,4,128,1,fp8,fp8,0,1.8386079788208007
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,1,128,1,float16,fp8,0,0.8373472213745117
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,1,128,1,float16,float16,0,0.9090160369873047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,1,128,1,fp8,fp8,0,0.8503855705261231
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,2,128,1,float16,float16,0,0.9008416175842285
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,2,128,1,float16,fp8,0,1.0220800399780274
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,2,128,1,fp8,fp8,0,0.8765567779541016
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,4,128,1,float16,float16,0,0.9383935928344727
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,8,128,1,float16,float16,0,0.516156816482544
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,4,128,1,float16,fp8,0,0.8387824058532715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,4,128,1,fp8,fp8,0,0.9248479843139649
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,8,128,1,float16,fp8,0,0.4647359848022461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,8,128,1,fp8,fp8,0,0.508241605758667
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,1,128,1,float16,float16,0,0.48183841705322267
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,1,128,1,fp8,fp8,0,0.45860958099365234
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,1,128,1,float16,fp8,0,0.504099178314209
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,2,128,1,float16,float16,0,0.48188161849975586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,2,128,1,float16,fp8,0,0.488921594619751
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,2,128,1,fp8,fp8,0,0.4667424201965332
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,4,128,1,float16,float16,0,0.5044528007507324
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,8,128,1,float16,fp8,0,0.2637135982513428
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,4,128,1,float16,fp8,0,0.45866241455078127
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,4,128,1,fp8,fp8,0,0.46768479347229003
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,8,128,1,fp8,fp8,0,0.2746000051498413
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,1,128,1,float16,float16,0,0.26917281150817873
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,1,128,1,float16,fp8,0,0.27037439346313474
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,1,128,1,fp8,fp8,0,0.26109440326690675
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,2,128,1,float16,float16,0,0.27035999298095703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,2,128,1,float16,fp8,0,0.2673072099685669
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,2,128,1,fp8,fp8,0,0.26841599941253663
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,8,128,1,float16,float16,0,0.1817296028137207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,8,128,1,float16,fp8,0,0.167630398273468
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,4,128,1,fp8,fp8,0,0.261516809463501
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,8,128,1,fp8,fp8,0,0.1717471957206726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,1,128,1,float16,float16,0,0.17325279712677003
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,1,128,1,float16,fp8,0,0.16784000396728516
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,1,128,1,fp8,fp8,0,0.1667423963546753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,2,128,1,float16,float16,0,0.17594239711761475
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,2,128,1,float16,fp8,0,0.1684399962425232
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,2,128,1,fp8,fp8,0,0.16745920181274415
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,2,128,1,float16,fp8,0,0.23590080738067626
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,4,128,1,float16,float16,0,0.1736672043800354
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,4,128,1,float16,fp8,0,0.17112640142440796
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,4,128,1,fp8,fp8,0,0.16879199743270873
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,8,1,128,1,float16,fp8,0,0.9737983703613281
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,8,1,128,1,float16,float16,0,1.0404064178466796
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,8,1,128,1,fp8,fp8,0,0.9741935729980469
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,8,2,128,1,float16,float16,0,1.0400768280029298
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,8,2,128,1,float16,fp8,0,0.9729680061340332
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,8,2,128,1,fp8,fp8,0,1.0338255882263183
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,8,128,1,float16,float16,0,0.3007296085357666
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,8,4,128,1,float16,float16,0,1.081158447265625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,8,4,128,1,float16,fp8,0,1.1530351638793945
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,4,128,1,float16,float16,0,0.2782047986984253
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,8,128,1,float16,fp8,0,0.5155375957489013
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,8,128,1,float16,float16,0,0.5938496112823486
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,4,128,1,float16,fp8,0,0.2704047918319702
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,8,128,1,fp8,fp8,0,0.5146063804626465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,8,4,128,1,fp8,fp8,0,0.9737183570861816
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,1,128,1,float16,float16,0,0.54061918258667
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,1,128,1,fp8,fp8,0,0.5140751838684082
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,2,128,1,float16,float16,0,0.5525919914245605
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,2,128,1,float16,fp8,0,0.5125152111053467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,2,128,1,fp8,fp8,0,0.5587584018707276
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,4,128,1,float16,float16,0,0.5515408039093017
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,8,128,1,float16,fp8,0,0.2863055944442749
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,8,128,1,float16,float16,0,0.3315727949142456
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,8,128,1,fp8,fp8,0,0.2980736017227173
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,4,128,1,fp8,fp8,0,0.5157167911529541
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,1,128,1,float16,float16,0,0.29251680374145506
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,1,128,1,float16,fp8,0,0.28410561084747316
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,2,128,1,float16,float16,0,0.29759840965270995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,1,128,1,fp8,fp8,0,0.32387681007385255
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,2,128,1,float16,fp8,0,0.284766411781311
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,2,128,1,fp8,fp8,0,0.29098079204559324
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,4,128,1,float16,float16,0,0.29923200607299805
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,8,128,1,float16,fp8,0,0.1700111985206604
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,4,128,1,float16,fp8,0,0.29026720523834226
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,4,128,1,fp8,fp8,0,0.302126407623291
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,8,128,1,fp8,fp8,0,0.16962720155715943
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,1,128,1,float16,float16,0,0.17058080434799194
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,1,128,1,float16,fp8,0,0.17032159566879274
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,1,128,1,fp8,fp8,0,0.17836320400238037
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,2,128,1,float16,float16,0,0.16928160190582275
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,2,128,1,float16,fp8,0,0.16824959516525267
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,2,128,1,fp8,fp8,0,0.16863199472427368
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,4,128,1,float16,float16,0,0.18351199626922607
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,4,128,1,fp8,fp8,0,0.16762880086898804
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,8,128,1,float16,fp8,0,0.11378400325775147
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,8,128,1,float16,float16,0,0.11766239404678344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,8,128,1,fp8,fp8,0,0.10959839820861816
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,1,128,1,float16,fp8,0,0.10940639972686768
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,1,128,1,float16,float16,0,0.11439839601516724
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,1,128,1,fp8,fp8,0,0.10947840213775635
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,2,128,1,float16,float16,0,0.11202720403671265
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,2,128,1,float16,fp8,0,0.10971360206604004
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,2,128,1,fp8,fp8,0,0.11123199462890625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,4,128,1,float16,fp8,0,0.1099776029586792
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,4,128,1,fp8,fp8,0,0.10976639986038209
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,1,128,1,float16,fp8,0,0.5133039951324463
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,8,1,128,1,float16,float16,0,0.9785455703735352
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,8,1,128,1,float16,fp8,0,0.9499008178710937
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,8,1,128,1,fp8,fp8,0,0.9497520446777343
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,4,128,1,float16,fp8,0,0.5325376033782959
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,8,2,128,1,float16,float16,0,0.9694623947143555
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,8,2,128,1,fp8,fp8,0,0.9504768371582031
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,8,2,128,1,float16,fp8,0,1.0720080375671386
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,8,128,1,float16,float16,0,0.18327679634094238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,8,4,128,1,float16,float16,0,1.0328543663024903
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,8,4,128,1,float16,fp8,0,0.9505056381225586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,8,128,1,float16,fp8,0,0.52848801612854
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,8,128,1,fp8,fp8,0,0.49726238250732424
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,8,4,128,1,fp8,fp8,0,0.9485872268676758
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,4,128,1,float16,fp8,0,0.1683743953704834
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,1,128,1,float16,fp8,0,0.49473118782043457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,1,128,1,fp8,fp8,0,0.5081439971923828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,2,128,1,float16,float16,0,0.5026624202728271
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,2,128,1,fp8,fp8,0,0.4932975769042969
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,4,128,1,float16,float16,0,0.11447199583053588
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,4,128,1,float16,float16,0,0.5235199928283691
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,4,128,1,float16,fp8,0,0.5020720005035401
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,4,128,1,fp8,fp8,0,0.4965231895446777
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,8,128,1,float16,fp8,0,0.26990718841552735
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,1,128,1,float16,float16,0,0.2669471979141235
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,1,128,1,fp8,fp8,0,0.26790881156921387
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,2,128,1,float16,float16,0,0.26808159351348876
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,2,128,1,float16,fp8,0,0.26863679885864256
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,2,128,1,fp8,fp8,0,0.26866400241851807
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,4,128,1,float16,float16,0,0.27723519802093505
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,4,128,1,float16,fp8,0,0.26847999095916747
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,4,128,1,fp8,fp8,0,0.269486403465271
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,8,128,1,float16,float16,0,0.1690400004386902
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,8,128,1,float16,fp8,0,0.15582079887390138
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,8,128,1,fp8,fp8,0,0.1557423949241638
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,1,128,1,float16,float16,0,0.15124800205230712
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,1,128,1,float16,fp8,0,0.15363680124282836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,8,128,1,float16,float16,0,0.5651504039764405
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,1,128,1,float16,float16,0,0.5018159866333007
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,2,128,1,float16,fp8,0,0.15268800258636475
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,2,128,1,fp8,fp8,0,0.15238239765167236
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,4,128,1,float16,float16,0,0.1576607942581177
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,2,128,1,float16,fp8,0,0.4942431926727295
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,4,128,1,float16,fp8,0,0.1531615972518921
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,4,128,1,fp8,fp8,0,0.15419039726257325
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,8,128,1,float16,float16,0,0.1062000036239624
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,8,128,1,float16,fp8,0,0.0977999985218048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,8,128,1,fp8,fp8,0,0.09807360172271729
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,8,128,1,float16,float16,0,0.29699039459228516
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,8,128,1,fp8,fp8,0,0.26973440647125246
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,1,128,1,float16,float16,0,0.09858080148696899
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,1,128,1,float16,fp8,0,0.09743520021438598
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,1,128,1,fp8,fp8,0,0.0973695993423462
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,2,128,1,float16,fp8,0,0.09736480116844178
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,2,128,1,fp8,fp8,0,0.09735360145568847
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,1,128,1,float16,fp8,0,0.2682719945907593
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,4,128,1,float16,fp8,0,0.09770399928092957
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,4,128,1,fp8,fp8,0,0.09756960272789002
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,8,128,1,float16,float16,0,0.06492639780044555
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,8,128,1,float16,fp8,0,0.06159520149230957
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,8,128,1,fp8,fp8,0,0.06155359745025635
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,1,128,1,float16,float16,0,0.06204959750175476
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,1,128,1,float16,fp8,0,0.061710399389266965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,1,128,1,fp8,fp8,0,0.061715197563171384
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,2,128,1,float16,float16,0,0.061977601051330565
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,2,128,1,float16,fp8,0,0.06167200207710266
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,2,128,1,fp8,fp8,0,0.06157600283622742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,4,128,1,float16,float16,0,0.06350399851799012
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,4,128,1,float16,fp8,0,0.06164000034332275
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,4,128,1,fp8,fp8,0,0.0616320013999939
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,8,1,128,1,float16,float16,0,0.5842527866363525
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,8,1,128,1,float16,fp8,0,0.5950032234191894
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,8,1,128,1,fp8,fp8,0,0.5953807830810547
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,1,128,1,fp8,fp8,0,0.15349119901657104
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,8,2,128,1,float16,float16,0,0.5922111988067627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,8,2,128,1,float16,fp8,0,0.5952735900878906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,8,2,128,1,fp8,fp8,0,0.5963136196136475
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,2,128,1,float16,float16,0,0.15144480466842652
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,8,128,1,float16,float16,0,0.3517568111419678
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,8,128,1,float16,fp8,0,0.31483519077301025
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,8,4,128,1,float16,fp8,0,0.5974095821380615
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,8,4,128,1,float16,float16,0,0.6235216140747071
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,8,4,128,1,fp8,fp8,0,0.5951007843017578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,8,128,1,fp8,fp8,0,0.31671040058135985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,1,128,1,float16,float16,0,0.3032527923583984
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,2,128,1,float16,float16,0,0.3031071901321411
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,1,128,1,fp8,fp8,0,0.3219007968902588
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,2,128,1,float16,fp8,0,0.31451199054718015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,2,128,1,float16,float16,0,0.09928960204124451
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,2,128,1,fp8,fp8,0,0.31510879993438723
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,4,128,1,float16,float16,0,0.10057599544525146
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,4,128,1,float16,float16,0,0.3183135986328125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,4,128,1,float16,fp8,0,0.3269376039505005
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,8,128,1,float16,float16,0,0.19050400257110595
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,8,128,1,float16,fp8,0,0.17551519870758056
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,8,128,1,fp8,fp8,0,0.1757248044013977
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,4,128,1,fp8,fp8,0,0.3138943910598755
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,1,128,1,float16,float16,0,0.16776959896087645
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,1,128,1,float16,fp8,0,0.17293920516967773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,2,128,1,float16,float16,0,0.16742559671401977
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,2,128,1,fp8,fp8,0,0.17399040460586548
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,2,128,1,float16,fp8,0,0.17365599870681764
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,4,128,1,float16,float16,0,0.17546080350875853
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,8,128,1,float16,float16,0,0.11346399784088135
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,4,128,1,fp8,fp8,0,0.17455999851226806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,8,128,1,float16,fp8,0,0.10335520505905152
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,8,128,1,fp8,fp8,0,0.10327680110931396
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,1,128,1,float16,float16,0,0.10028320550918579
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,1,128,1,float16,fp8,0,0.10315359830856323
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,2,128,1,float16,float16,0,0.10060960054397583
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,2,128,1,float16,fp8,0,0.10317280292510986
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,2,128,1,fp8,fp8,0,0.10303360223770142
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,4,128,1,float16,float16,0,0.10453280210494995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,4,128,1,float16,fp8,0,0.10326720476150512
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,4,128,1,fp8,fp8,0,0.10299520492553711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,8,128,1,float16,float16,0,0.07211040258407593
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,8,128,1,float16,fp8,0,0.06622560024261474
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,8,128,1,fp8,fp8,0,0.0666815996170044
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,1,128,1,float16,float16,0,0.06704639792442321
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,1,128,1,float16,fp8,0,0.06666240096092224
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,2,128,1,float16,float16,0,0.06654719710350036
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,2,128,1,float16,fp8,0,0.06728000044822693
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,2,128,1,fp8,fp8,0,0.06700000166893005
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,4,128,1,float16,float16,0,0.06846240162849426
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,4,128,1,float16,fp8,0,0.06751840114593506
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,8,128,1,float16,float16,0,0.05133600234985351
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,4,128,1,fp8,fp8,0,0.06662719845771789
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,8,128,1,float16,fp8,0,0.04886560142040253
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,8,128,1,fp8,fp8,0,0.04829280078411102
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,1,128,1,float16,float16,0,0.049404799938201904
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,1,128,1,float16,fp8,0,0.04824639856815338
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,1,128,1,fp8,fp8,0,0.04778560101985931
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,2,128,1,float16,float16,0,0.04924319982528687
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,2,128,1,float16,fp8,0,0.0478767991065979
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,2,128,1,fp8,fp8,0,0.04820320010185242
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,4,128,1,float16,float16,0,0.04959999918937683
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,4,128,1,fp8,fp8,0,0.047907200455665586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,8,1,128,1,float16,float16,0,0.5788415908813477
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,8,1,128,1,float16,fp8,0,0.6163919925689697
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,1,128,1,float16,fp8,0,0.32745440006256105
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,8,1,128,1,fp8,fp8,0,0.6165616035461425
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,8,2,128,1,float16,float16,0,0.5876143932342529
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,8,2,128,1,float16,fp8,0,0.6174240112304688
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,1,128,1,fp8,fp8,0,0.1764016032218933
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,4,128,1,float16,fp8,0,0.17467039823532104
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,1,128,1,fp8,fp8,0,0.10306880474090577
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,8,2,128,1,fp8,fp8,0,0.6351568222045898
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,8,4,128,1,float16,float16,0,0.6260272026062011
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,8,4,128,1,float16,fp8,0,0.6183631896972657
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,8,4,128,1,fp8,fp8,0,0.6161456108093262
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,8,128,1,float16,float16,0,0.3604687929153442
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,8,128,1,fp8,fp8,0,0.3426287889480591
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,1,128,1,float16,float16,0,0.29390881061553953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,1,128,1,float16,fp8,0,0.3214879989624023
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,1,128,1,fp8,fp8,0,0.32291040420532224
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,2,128,1,float16,float16,0,0.3054368019104004
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,2,128,1,float16,fp8,0,0.3219583988189697
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,2,128,1,fp8,fp8,0,0.3211744070053101
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,4,128,1,float16,fp8,0,0.04782400131225586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,4,128,1,float16,float16,0,0.3172271966934204
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,4,128,1,float16,fp8,0,0.3312079906463623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,4,128,1,fp8,fp8,0,0.3221280097961426
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,8,128,1,float16,float16,0,0.19081439971923828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,8,128,1,float16,fp8,0,0.17502880096435547
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,1,128,1,float16,float16,0,0.16069600582122803
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,1,128,1,float16,fp8,0,0.17399359941482545
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,1,128,1,fp8,fp8,0,0.17352800369262694
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,2,128,1,float16,float16,0,0.1607743978500366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,2,128,1,float16,fp8,0,0.17575839757919312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,2,128,1,fp8,fp8,0,0.17377599477767944
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,4,128,1,float16,float16,0,0.17126879692077637
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,4,128,1,float16,fp8,0,0.17427680492401124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,4,128,1,fp8,fp8,0,0.17600640058517455
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,8,128,1,float16,float16,0,0.10969120264053345
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,8,128,1,float16,fp8,0,0.10053759813308716
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,1,128,1,float16,float16,0,0.09383839964866639
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,1,128,1,float16,fp8,0,0.09794399738311768
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,1,128,1,fp8,fp8,0,0.09769440293312073
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,2,128,1,float16,float16,0,0.09486240148544312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,2,128,1,float16,fp8,0,0.0983568012714386
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,2,128,1,fp8,fp8,0,0.09802719950675964
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,4,128,1,float16,float16,0,0.10096640586853027
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,4,128,1,float16,fp8,0,0.09874399900436401
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,4,128,1,fp8,fp8,0,0.0985040009021759
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,8,128,1,float16,fp8,0,0.0615119993686676
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,8,128,1,float16,float16,0,0.06708160042762756
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,8,128,1,fp8,fp8,0,0.06111360192298889
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,1,128,1,float16,float16,0,0.05899040102958679
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,1,128,1,float16,fp8,0,0.061742401123046874
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,1,128,1,fp8,fp8,0,0.06093119978904724
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,2,128,1,float16,float16,0,0.060068798065185544
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,2,128,1,float16,fp8,0,0.06128159761428833
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,2,128,1,fp8,fp8,0,0.06172320246696472
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,4,128,1,float16,float16,0,0.06183680295944214
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,4,128,1,float16,fp8,0,0.061166399717330934
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,4,128,1,fp8,fp8,0,0.06124160289764404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,8,128,1,float16,fp8,0,0.0384799987077713
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,1,128,1,float16,float16,0,0.037196800112724304
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,8,128,1,fp8,fp8,0,0.03875359892845154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,1,128,1,float16,fp8,0,0.03782399892807007
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,1,128,1,fp8,fp8,0,0.03916000127792359
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,2,128,1,float16,float16,0,0.037529599666595456
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,2,128,1,fp8,fp8,0,0.0390639990568161
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,4,128,1,float16,float16,0,0.03875040113925934
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,4,128,1,float16,fp8,0,0.03918879926204681
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,1,128,1,fp8,fp8,0,0.06630719900131225
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,8,128,1,float16,fp8,0,0.32761440277099607
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,8,128,1,float16,float16,0,0.03703519999980927
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,8,128,1,float16,fp8,0,0.03505440056324005
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,8,128,1,fp8,fp8,0,0.035051199793815616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,1,128,1,float16,float16,0,0.035102400183677676
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,1,128,1,float16,fp8,0,0.03508000075817108
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,1,128,1,fp8,fp8,0,0.03498240113258362
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,2,128,1,float16,fp8,0,0.03499360084533691
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,2,128,1,fp8,fp8,0,0.03507519960403442
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,4,128,1,float16,float16,0,0.03545759916305542
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,4,128,1,float16,fp8,0,0.03506079912185669
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,4,128,1,fp8,fp8,0,0.03497759997844696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,8,1,128,1,float16,float16,0,0.3586431980133057
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,8,1,128,1,float16,fp8,0,0.4052896022796631
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,8,1,128,1,fp8,fp8,0,0.40692639350891113
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,8,2,128,1,float16,float16,0,0.3624079942703247
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,8,128,1,fp8,fp8,0,0.10081440210342407
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,8,2,128,1,float16,fp8,0,0.4064191818237305
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,8,2,128,1,fp8,fp8,0,0.40613441467285155
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,8,4,128,1,float16,float16,0,0.39623360633850097
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,8,4,128,1,float16,fp8,0,0.4074560165405273
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,8,4,128,1,fp8,fp8,0,0.40572161674499513
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,8,128,1,float16,float16,0,0.2371664047241211
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,8,128,1,float16,fp8,0,0.21555519104003906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,8,128,1,fp8,fp8,0,0.21558880805969238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,1,128,1,float16,float16,0,0.18869919776916505
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,1,128,1,float16,fp8,0,0.21373279094696046
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,1,128,1,fp8,fp8,0,0.21335840225219727
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,2,128,1,float16,fp8,0,0.038047999143600464
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,2,128,1,float16,float16,0,0.19023679494857787
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,4,128,1,fp8,fp8,0,0.038913598656654357
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,2,128,1,float16,fp8,0,0.21383841037750245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,2,128,1,fp8,fp8,0,0.21375999450683594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,4,128,1,float16,float16,0,0.2057823896408081
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,4,128,1,float16,fp8,0,0.21455039978027343
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,8,128,1,float16,fp8,0,0.11990560293197632
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,2,128,1,float16,float16,0,0.03506079912185669
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,4,128,1,fp8,fp8,0,0.21465919017791749
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,8,128,1,fp8,fp8,0,0.11953279972076417
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,1,128,1,float16,float16,0,0.1063920021057129
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,2,128,1,float16,float16,0,0.10755840539932252
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,1,128,1,fp8,fp8,0,0.11747679710388184
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,2,128,1,float16,fp8,0,0.11737920045852661
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,2,128,1,fp8,fp8,0,0.1173200011253357
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,4,128,1,float16,float16,0,0.11548479795455932
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,4,128,1,float16,fp8,0,0.11830079555511475
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,8,128,1,float16,float16,0,0.07665119767189026
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,8,128,1,float16,fp8,0,0.06915199756622314
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,8,128,1,fp8,fp8,0,0.17584799528121947
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,8,128,1,fp8,fp8,0,0.06885600090026855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,1,128,1,float16,float16,0,0.06347039937973023
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,1,128,1,float16,fp8,0,0.06832320094108582
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,1,128,1,fp8,fp8,0,0.06782079935073852
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,2,128,1,float16,float16,0,0.06346560120582581
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,2,128,1,float16,fp8,0,0.06842560172080994
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,4,128,1,float16,float16,0,0.06795039772987366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,4,128,1,float16,fp8,0,0.06827200055122376
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,4,128,1,fp8,fp8,0,0.06796799898147583
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,8,128,1,float16,float16,0,0.04731679856777191
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,8,128,1,float16,fp8,0,0.04325920045375824
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,1,128,1,float16,float16,0,0.04200640022754669
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,8,128,1,fp8,fp8,0,0.04398080110549927
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,1,128,1,float16,fp8,0,0.04328480064868927
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,1,128,1,fp8,fp8,0,0.043647998571395875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,2,128,1,float16,float16,0,0.04215520024299622
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,2,128,1,float16,fp8,0,0.043812799453735354
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,2,128,1,fp8,fp8,0,0.04328800141811371
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,4,128,1,float16,float16,0,0.04386720061302185
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,4,128,1,float16,fp8,0,0.043268799781799316
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,4,128,1,fp8,fp8,0,0.04382559955120087
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,8,128,1,float16,float16,0,0.032918399572372435
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,8,128,1,float16,fp8,0,0.031006398797035217
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,8,128,1,fp8,fp8,0,0.030907198786735535
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,1,128,1,float16,float16,0,0.031006398797035217
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,1,128,1,float16,fp8,0,0.030907198786735535
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,1,128,1,fp8,fp8,0,0.031012800335884095
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,2,128,1,float16,float16,0,0.030862399935722352
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,2,128,1,float16,fp8,0,0.03096640110015869
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,8,128,1,float16,float16,0,0.04116159975528717
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,2,128,1,fp8,fp8,0,0.030900800228118898
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,4,128,1,float16,float16,0,0.030953601002693176
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,4,128,1,float16,fp8,0,0.030921599268913268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,4,128,1,fp8,fp8,0,0.030961599946022034
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,8,128,1,float16,float16,0,0.029676800966262816
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,8,128,1,float16,fp8,0,0.028892800211906433
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,8,128,1,fp8,fp8,0,0.02882719933986664
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,1,128,1,float16,float16,0,0.02892799973487854
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,1,128,1,float16,fp8,0,0.02890399992465973
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,1,128,1,fp8,fp8,0,0.02884320020675659
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,2,128,1,float16,float16,0,0.028880000114440918
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,2,128,1,float16,fp8,0,0.028892800211906433
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,2,128,1,fp8,fp8,0,0.028932800889015196
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,4,128,1,float16,float16,0,0.02884959876537323
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,4,128,1,float16,fp8,0,0.02887519896030426
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,4,128,1,fp8,fp8,0,0.028935998678207397
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,8,128,1,float16,float16,0,0.1297968029975891
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,8,1,128,1,float16,float16,0,0.3724368095397949
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,8,1,128,1,float16,fp8,0,0.4492208003997803
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,1,128,1,float16,fp8,0,0.11698559522628785
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,8,1,128,1,fp8,fp8,0,0.4484864234924316
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,8,2,128,1,float16,float16,0,0.3836303949356079
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,4,128,1,fp8,fp8,0,0.118339204788208
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,8,2,128,1,float16,fp8,0,0.4501840114593506
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,8,2,128,1,fp8,fp8,0,0.4496799945831299
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,8,4,128,1,float16,fp8,0,0.4497551918029785
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,8,4,128,1,float16,float16,0,0.42206721305847167
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,8,4,128,1,fp8,fp8,0,0.4514607906341553
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,8,128,1,float16,float16,0,0.26105599403381347
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,2,128,1,fp8,fp8,0,0.06795039772987366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,8,128,1,fp8,fp8,0,0.2357503890991211
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,1,128,1,float16,fp8,0,0.23326239585876465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,1,128,1,fp8,fp8,0,0.23427839279174806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,2,128,1,float16,float16,0,0.19802720546722413
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,2,128,1,fp8,fp8,0,0.2342832088470459
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,2,128,1,float16,fp8,0,0.23341119289398193
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,4,128,1,float16,fp8,0,0.23406400680541992
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,8,128,1,float16,float16,0,0.13900480270385743
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,4,128,1,fp8,fp8,0,0.23408799171447753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,8,128,1,float16,fp8,0,0.1269711971282959
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,8,128,1,fp8,fp8,0,0.1267840027809143
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,1,128,1,float16,float16,0,0.10821599960327148
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,1,128,1,float16,fp8,0,0.12450720071792602
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,2,128,1,float16,float16,0,0.10844960212707519
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,2,128,1,float16,fp8,0,0.12594879865646363
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,2,128,1,fp8,fp8,0,0.12492320537567139
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,4,128,1,float16,float16,0,0.11816320419311524
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,4,128,1,float16,fp8,0,0.12613120079040527
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,4,128,1,fp8,fp8,0,0.12585279941558838
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,8,128,1,float16,fp8,0,0.0722208023071289
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,8,128,1,fp8,fp8,0,0.07203519940376282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,1,128,1,float16,float16,0,0.0622655987739563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,1,128,1,float16,fp8,0,0.0701856017112732
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,2,128,1,float16,float16,0,0.06358240246772766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,2,128,1,fp8,fp8,0,0.0699504017829895
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,2,128,1,float16,fp8,0,0.06995840072631836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,4,128,1,float16,float16,0,0.0690895974636078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,4,128,1,float16,fp8,0,0.07000640034675598
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,4,128,1,fp8,fp8,0,0.07022879719734192
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,8,128,1,float16,float16,0,0.04745120108127594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,8,128,1,float16,fp8,0,0.043222400546073916
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,8,128,1,fp8,fp8,0,0.04318720102310181
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,1,128,1,float16,float16,0,0.03968639969825745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,1,128,1,float16,fp8,0,0.04320639967918396
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,1,128,1,fp8,fp8,0,0.043244799971580504
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,2,128,1,float16,float16,0,0.039904001355171206
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,2,128,1,float16,fp8,0,0.04321599900722504
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,1,128,1,float16,float16,0,0.19466079473495485
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,2,128,1,fp8,fp8,0,0.043249601125717164
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,4,128,1,float16,float16,0,0.04195359945297241
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,4,128,1,float16,fp8,0,0.04326240122318268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,4,128,1,fp8,fp8,0,0.04327360093593598
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,8,128,1,float16,float16,0,0.028692799806594848
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,8,128,1,float16,fp8,0,0.026767998933792114
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,4,128,1,float16,float16,0,0.2170896053314209
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,8,128,1,fp8,fp8,0,0.02683840095996857
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,1,128,1,float16,float16,0,0.024798400700092316
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,1,128,1,float16,fp8,0,0.026824000477790832
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,1,128,1,fp8,fp8,0,0.026822400093078614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,2,128,1,float16,float16,0,0.02476159930229187
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,2,128,1,float16,fp8,0,0.0268528014421463
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,2,128,1,fp8,fp8,0,0.02680160105228424
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,4,128,1,float16,float16,0,0.026265600323677064
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,4,128,1,float16,fp8,0,0.026840001344680786
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,4,128,1,fp8,fp8,0,0.02687999904155731
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,8,128,1,float16,float16,0,0.02476799935102463
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,1,128,1,fp8,fp8,0,0.12553600072860718
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,8,128,1,fp8,fp8,0,0.02463040053844452
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,1,128,1,float16,fp8,0,0.024120000004768372
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,1,128,1,fp8,fp8,0,0.024383999407291412
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,2,128,1,float16,float16,0,0.02287199944257736
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,2,128,1,float16,fp8,0,0.02459840029478073
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,2,128,1,fp8,fp8,0,0.024432000517845155
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,8,128,1,float16,float16,0,0.07898079752922058
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,4,128,1,float16,float16,0,0.023924799263477327
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,4,128,1,float16,fp8,0,0.023163199424743652
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,4,128,1,fp8,fp8,0,0.02460159957408905
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,8,128,1,float16,float16,0,0.0227743998169899
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,8,128,1,float16,fp8,0,0.02266560047864914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,8,128,1,fp8,fp8,0,0.02268960028886795
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,1,128,1,float16,float16,0,0.0227743998169899
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,1,128,1,float16,fp8,0,0.022683200240135194
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,1,128,1,fp8,fp8,0,0.0701088011264801
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,1,128,1,fp8,fp8,0,0.022724799811840057
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,2,128,1,float16,float16,0,0.02272160053253174
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,2,128,1,float16,fp8,0,0.02265920042991638
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,2,128,1,fp8,fp8,0,0.02274720072746277
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,4,128,1,float16,fp8,0,0.022771200537681578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,8,1,128,1,float16,float16,0,0.27827839851379393
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,8,1,128,1,float16,fp8,0,0.3631711959838867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,8,1,128,1,fp8,fp8,0,0.36407520771026614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,8,2,128,1,float16,float16,0,0.283243203163147
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,8,2,128,1,float16,fp8,0,0.3634063959121704
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,8,2,128,1,fp8,fp8,0,0.3640559911727905
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,8,4,128,1,float16,float16,0,0.3254784107208252
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,8,4,128,1,fp8,fp8,0,0.36450240612030027
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,8,128,1,float16,fp8,0,0.2351248025894165
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,8,8,128,1,float16,float16,0,0.2114016056060791
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,8,8,128,1,float16,fp8,0,0.19052959680557252
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,8,8,128,1,fp8,fp8,0,0.19105440378189087
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,8,1,128,1,float16,fp8,0,0.18921120166778566
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,8,2,128,1,float16,float16,0,0.14912480115890503
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,8,128,1,float16,fp8,0,0.02468000054359436
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,1,128,1,float16,float16,0,0.022678400576114654
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,8,2,128,1,float16,fp8,0,0.18933279514312745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,8,2,128,1,fp8,fp8,0,0.18860000371932983
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,8,4,128,1,float16,float16,0,0.16819519996643068
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,8,8,128,1,float16,float16,0,0.11309759616851807
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,8,4,128,1,fp8,fp8,0,0.189191997051239
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,8,8,128,1,float16,fp8,0,0.10303200483322143
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,8,8,128,1,fp8,fp8,0,0.10241600275039672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,8,1,128,1,float16,float16,0,0.0830735981464386
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,4,128,1,float16,float16,0,0.022681599855422972
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,4,128,1,fp8,fp8,0,0.0226623997092247
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,8,1,128,1,fp8,fp8,0,0.10141279697418212
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,8,2,128,1,float16,float16,0,0.08334879875183106
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,8,2,128,1,float16,fp8,0,0.10191199779510499
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,8,2,128,1,fp8,fp8,0,0.10210399627685547
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,8,4,128,1,float16,float16,0,0.09342560172080994
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,8,4,128,1,float16,fp8,0,0.10196479558944702
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,8,4,128,1,fp8,fp8,0,0.10263839960098267
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,8,8,128,1,float16,float16,0,0.06366559863090515
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,8,8,128,1,float16,fp8,0,0.0575872004032135
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,8,8,128,1,fp8,fp8,0,0.057734400033950806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,8,1,128,1,float16,float16,0,0.04736959934234619
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,8,1,128,1,float16,fp8,0,0.05554400086402893
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,8,2,128,1,float16,float16,0,0.048153600096702574
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,8,2,128,1,float16,fp8,0,0.05591679811477661
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,8,2,128,1,fp8,fp8,0,0.05596799850463867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,8,4,128,1,float16,float16,0,0.0538752019405365
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,8,4,128,1,float16,fp8,0,0.05583680272102356
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,8,4,128,1,fp8,fp8,0,0.05604479908943176
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,8,8,128,1,float16,float16,0,0.038843199610710144
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,8,8,128,1,float16,fp8,0,0.03487200140953064
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,8,4,128,1,float16,fp8,0,0.36309759616851806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,8,1,128,1,float16,float16,0,0.030134400725364684
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,8,1,128,1,float16,fp8,0,0.0330128014087677
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,8,1,128,1,fp8,fp8,0,0.03303360044956207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,8,1,128,1,float16,float16,0,0.14880319833755493
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,8,2,128,1,float16,float16,0,0.029254400730133058
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,8,2,128,1,float16,fp8,0,0.0347216010093689
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,8,2,128,1,fp8,fp8,0,0.033107200264930726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,8,4,128,1,float16,float16,0,0.03281759917736053
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,8,1,128,1,fp8,fp8,0,0.19035199880599976
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,8,4,128,1,float16,fp8,0,0.033339199423789975
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,8,8,128,1,float16,float16,0,0.022731199860572815
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,8,8,128,1,float16,fp8,0,0.020735999941825865
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,8,8,128,1,fp8,fp8,0,0.021065600216388702
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,8,1,128,1,float16,float16,0,0.018681600689888
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,8,1,128,1,float16,fp8,0,0.02080959975719452
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,8,2,128,1,float16,float16,0,0.018727999925613404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,8,1,128,1,fp8,fp8,0,0.02074880003929138
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,8,2,128,1,float16,fp8,0,0.02080480009317398
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,8,2,128,1,fp8,fp8,0,0.020720000565052032
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,8,4,128,1,float16,float16,0,0.020659199357032774
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,8,4,128,1,float16,fp8,0,0.020745599269866945
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,8,4,128,1,fp8,fp8,0,0.020712000131607056
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,8,8,128,1,float16,float16,0,0.018691200017929076
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,8,4,128,1,float16,fp8,0,0.18951679468154908
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,8,8,128,1,float16,fp8,0,0.018648000061511995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,8,8,128,1,fp8,fp8,0,0.018692800402641298
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,8,1,128,1,float16,float16,0,0.016939200460910797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,8,1,128,1,float16,fp8,0,0.0186271995306015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,8,1,128,1,fp8,fp8,0,0.018668800592422485
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,8,2,128,1,float16,float16,0,0.016728000342845918
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,8,2,128,1,float16,fp8,0,0.018590399622917177
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,8,2,128,1,fp8,fp8,0,0.01860159933567047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,8,4,128,1,float16,float16,0,0.018667200207710268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,8,4,128,1,float16,fp8,0,0.018643200397491455
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,8,1,128,1,float16,fp8,0,0.1007040023803711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,8,4,128,1,fp8,fp8,0,0.018619200587272643
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,8,8,128,1,float16,float16,0,0.016678400337696075
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,8,8,128,1,fp8,fp8,0,0.016598400473594666
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,8,8,128,1,float16,fp8,0,0.01659200042486191
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,8,1,128,1,float16,float16,0,0.016633599996566772
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,8,2,128,1,float16,float16,0,0.016595199704170227
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,8,2,128,1,float16,fp8,0,0.016606399416923524
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,8,2,128,1,fp8,fp8,0,0.016582399606704712
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,8,4,128,1,float16,float16,0,0.016604800522327424
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,8,4,128,1,float16,fp8,0,0.016622400283813475
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,8,4,128,1,fp8,fp8,0,0.016590400040149687
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,8,8,128,1,float16,float16,0,0.01659359931945801
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,8,8,128,1,float16,fp8,0,0.016601599752902985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,8,8,128,1,fp8,fp8,0,0.016601599752902985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,8,1,128,1,fp8,fp8,0,0.05556960105895996
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,8,1,128,1,float16,float16,0,0.016625599563121797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,8,1,128,1,fp8,fp8,0,0.016550399363040924
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,8,2,128,1,float16,float16,0,0.01661120057106018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,8,2,128,1,float16,fp8,0,0.016564799845218657
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,8,2,128,1,fp8,fp8,0,0.016625599563121797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,8,4,128,1,float16,float16,0,0.016568000614643096
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,8,4,128,1,float16,fp8,0,0.016625599563121797
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,8,4,128,1,fp8,fp8,0,0.016548800468444824
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,8,8,128,1,fp8,fp8,0,0.03487200140953064
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,8,1,128,1,float16,fp8,0,0.16292159557342528
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,8,1,128,1,fp8,fp8,0,0.164628803730011
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,8,2,128,1,float16,float16,0,0.1231727957725525
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,8,2,128,1,float16,fp8,0,0.16456799507141112
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,8,2,128,1,fp8,fp8,0,0.16410720348358154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,8,4,128,1,float16,float16,0,0.14334720373153687
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,8,4,128,1,fp8,fp8,0,0.03442080020904541
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,8,4,128,1,float16,fp8,0,0.1643488049507141
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,8,8,128,1,float16,fp8,0,0.08771039843559265
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,8,4,128,1,fp8,fp8,0,0.16467200517654418
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,8,1,128,1,float16,float16,0,0.06726880073547363
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,8,1,128,1,float16,fp8,0,0.08626880049705506
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,8,1,128,1,fp8,fp8,0,0.08653600215911865
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,8,2,128,1,float16,float16,0,0.06817280054092408
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,8,2,128,1,float16,fp8,0,0.08668000102043152
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,8,2,128,1,fp8,fp8,0,0.08626400232315064
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,8,4,128,1,float16,float16,0,0.07814880013465882
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,8,4,128,1,float16,fp8,0,0.08666239976882935
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,8,8,128,1,float16,float16,0,0.056443202495574954
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,8,4,128,1,fp8,fp8,0,0.08818560242652893
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,8,1,128,1,fp8,fp8,0,0.016622400283813475
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,8,8,128,1,float16,fp8,0,0.0510047972202301
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,8,1,128,1,float16,float16,0,0.039099198579788205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,8,1,128,1,float16,fp8,0,0.04907520115375519
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,8,1,128,1,fp8,fp8,0,0.04893760085105896
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,8,2,128,1,float16,float16,0,0.03997919857501984
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,8,2,128,1,float16,fp8,0,0.04936160147190094
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,8,2,128,1,fp8,fp8,0,0.049004799127578734
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,8,4,128,1,float16,float16,0,0.04541119933128357
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,8,4,128,1,float16,fp8,0,0.04931040108203888
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,8,4,128,1,fp8,fp8,0,0.04936800003051758
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,8,8,128,1,float16,fp8,0,0.028881600499153136
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,8,8,128,1,fp8,fp8,0,0.028857600688934327
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,8,1,128,1,float16,float16,0,0.023419199883937834
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,8,1,128,1,float16,float16,0,0.12154560089111328
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,8,1,128,1,float16,fp8,0,0.028835201263427736
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,8,1,128,1,fp8,fp8,0,0.02889440059661865
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,8,2,128,1,float16,fp8,0,0.028907200694084166
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,8,2,128,1,fp8,fp8,0,0.02874079942703247
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,8,4,128,1,float16,float16,0,0.02513439953327179
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,8,4,128,1,float16,fp8,0,0.028705599904060363
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,8,4,128,1,fp8,fp8,0,0.0288239985704422
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,8,8,128,1,float16,float16,0,0.09863680005073547
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,8,8,128,1,float16,float16,0,0.018676799535751343
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,8,8,128,1,fp8,fp8,0,0.08770080208778382
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,8,8,128,1,float16,fp8,0,0.018675200641155243
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,8,8,128,1,fp8,fp8,0,0.01860959976911545
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,8,1,128,1,float16,float16,0,0.014985600113868713
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,8,1,128,1,float16,fp8,0,0.018587200343608855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,8,1,128,1,fp8,fp8,0,0.018697600066661834
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,8,2,128,1,float16,float16,0,0.014502400159835815
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,8,2,128,1,float16,fp8,0,0.018603199720382692
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,8,2,128,1,fp8,fp8,0,0.018611200153827667
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,8,4,128,1,float16,float16,0,0.016601599752902985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,8,4,128,1,float16,fp8,0,0.01857440024614334
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,8,8,128,1,float16,float16,0,0.01653600037097931
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,8,8,128,1,float16,fp8,0,0.016465599834918975
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,8,8,128,1,fp8,fp8,0,0.016542400419712066
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,8,1,128,1,float16,fp8,0,0.016595199704170227
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,8,1,128,1,float16,fp8,0,0.014579200744628906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,8,8,128,1,fp8,fp8,0,0.05139840245246887
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,8,1,128,1,fp8,fp8,0,0.014545600116252898
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,8,2,128,1,float16,float16,0,0.014470399916172027
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,8,2,128,1,float16,fp8,0,0.014627200365066529
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,8,2,128,1,fp8,fp8,0,0.014672000706195832
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,8,4,128,1,float16,float16,0,0.014587199687957764
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,8,4,128,1,float16,fp8,0,0.014641599357128143
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,8,4,128,1,fp8,fp8,0,0.014553600549697876
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,8,1,128,1,float16,fp8,0,0.016571199893951415
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,8,8,128,1,float16,fp8,0,0.014555199444293976
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,8,8,128,1,fp8,fp8,0,0.014470399916172027
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,8,1,128,1,float16,float16,0,0.013040000200271606
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,8,1,128,1,float16,fp8,0,0.014521600306034088
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,8,1,128,1,fp8,fp8,0,0.014574399590492249
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,8,8,128,1,float16,float16,0,0.03107360005378723
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,8,2,128,1,float16,float16,0,0.012612800300121307
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,8,2,128,1,float16,fp8,0,0.014871999621391296
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,8,4,128,1,float16,float16,0,0.014531199634075165
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,8,4,128,1,float16,fp8,0,0.014552000164985656
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,8,4,128,1,fp8,fp8,0,0.014497600495815277
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,8,8,128,1,float16,float16,0,0.012803199887275695
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,8,8,128,1,float16,fp8,0,0.013236799836158752
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,8,8,128,1,fp8,fp8,0,0.012580800056457519
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,8,2,128,1,float16,float16,0,0.023839999735355378
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,8,1,128,1,float16,float16,0,0.012585599720478059
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,8,1,128,1,float16,fp8,0,0.012590399384498597
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,8,1,128,1,fp8,fp8,0,0.012835200130939483
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,8,2,128,1,float16,float16,0,0.012468799948692322
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,8,2,128,1,float16,fp8,0,0.01250240057706833
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,8,2,128,1,fp8,fp8,0,0.012604799866676331
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,8,4,128,1,float16,float16,0,0.012516799569129943
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,8,4,128,1,fp8,fp8,0,0.012540799379348756
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,8,4,128,1,float16,fp8,0,0.01324319988489151
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,8,8,128,1,float16,float16,0,0.012539200484752655
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,8,8,128,1,float16,fp8,0,0.012529599666595458
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,8,8,128,1,fp8,fp8,0,0.01282079964876175
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,8,1,128,1,float16,fp8,0,0.012491200119256973
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,8,1,128,1,fp8,fp8,0,0.012462399899959564
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,8,2,128,1,float16,float16,0,0.012489599734544754
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,8,2,128,1,float16,fp8,0,0.012510399520397186
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,8,2,128,1,fp8,fp8,0,0.012596799433231354
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,8,4,128,1,float16,float16,0,0.012520000338554382
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,8,4,128,1,float16,fp8,0,0.012545600533485413
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,8,4,128,1,fp8,fp8,0,0.012755200266838074
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,8,1,128,1,float16,float16,0,0.07822080254554749
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,8,1,128,1,float16,fp8,0,0.09791679978370667
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,8,1,128,1,fp8,fp8,0,0.09810400009155273
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,8,2,128,1,float16,float16,0,0.07836639881134033
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,8,2,128,1,fp8,fp8,0,0.09842079877853394
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,8,2,128,1,float16,fp8,0,0.09838240146636963
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,8,4,128,1,float16,float16,0,0.08683040142059326
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,8,4,128,1,float16,fp8,0,0.09856320023536683
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,8,8,128,1,float16,float16,0,0.057873600721359254
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,8,4,128,1,fp8,fp8,0,0.09852640032768249
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,8,8,128,1,float16,fp8,0,0.05345600247383118
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,8,8,128,1,fp8,fp8,0,0.05343679785728454
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,8,1,128,1,float16,float16,0,0.043224000930786134
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,8,4,128,1,fp8,fp8,0,0.018569600582122803
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,8,1,128,1,float16,fp8,0,0.05148000121116638
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,8,1,128,1,fp8,fp8,0,0.05173919796943664
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,8,2,128,1,float16,float16,0,0.043263998627662656
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,8,2,128,1,float16,fp8,0,0.051419198513031006
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,8,2,128,1,fp8,fp8,0,0.051500797271728516
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,8,4,128,1,float16,fp8,0,0.05183839797973633
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,8,4,128,1,fp8,fp8,0,0.05155680179595947
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,8,8,128,1,float16,float16,0,0.03478400111198425
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,8,8,128,1,float16,fp8,0,0.030976000428199767
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,8,8,128,1,fp8,fp8,0,0.030979201197624207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,8,1,128,1,float16,fp8,0,0.030910399556159974
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,8,1,128,1,fp8,fp8,0,0.030926400423049928
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,8,2,128,1,float16,float16,0,0.026700800657272337
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,8,2,128,1,float16,fp8,0,0.03094879984855652
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,8,2,128,1,fp8,fp8,0,0.030958399176597595
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,8,4,128,1,float16,float16,0,0.0287200003862381
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,8,8,128,1,float16,float16,0,0.014507199823856353
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,8,4,128,1,float16,fp8,0,0.03094559907913208
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,8,4,128,1,fp8,fp8,0,0.030956798791885377
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,8,8,128,1,float16,float16,0,0.020638400316238405
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,8,8,128,1,float16,fp8,0,0.018719999492168425
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,8,1,128,1,float16,float16,0,0.01656160056591034
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,8,8,128,1,fp8,fp8,0,0.0187376007437706
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,8,1,128,1,float16,fp8,0,0.01868640035390854
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,8,1,128,1,fp8,fp8,0,0.018691200017929076
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,8,2,128,1,float16,float16,0,0.016631999611854555
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,8,2,128,1,fp8,fp8,0,0.01451520025730133
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,8,2,128,1,float16,fp8,0,0.01857919991016388
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,8,2,128,1,fp8,fp8,0,0.018694399297237395
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,8,4,128,1,float16,fp8,0,0.018673600256443025
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,8,4,128,1,fp8,fp8,0,0.018699200451374055
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,8,4,128,1,float16,float16,0,0.0186271995306015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,8,8,128,1,float16,float16,0,0.013289600610733032
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,8,8,128,1,fp8,fp8,0,0.012670400738716125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,8,1,128,1,float16,float16,0,0.012454400211572647
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,8,8,128,1,float16,fp8,0,0.012540799379348756
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,8,1,128,1,float16,fp8,0,0.012899200618267059
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,8,1,128,1,fp8,fp8,0,0.012515200674533844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,8,2,128,1,float16,float16,0,0.01252799928188324
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,8,2,128,1,float16,fp8,0,0.012483199685811996
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,8,2,128,1,fp8,fp8,0,0.01255200058221817
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,8,4,128,1,float16,fp8,0,0.012535999715328216
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,8,4,128,1,fp8,fp8,0,0.012755200266838074
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,8,8,128,1,float16,float16,0,0.012403199821710587
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,8,8,128,1,float16,fp8,0,0.012455999851226807
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,8,8,128,1,fp8,fp8,0,0.012432000041007996
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,8,1,128,1,float16,float16,0,0.010496000200510025
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,8,1,128,1,float16,fp8,0,0.012441600114107132
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,8,1,128,1,fp8,fp8,0,0.01234399974346161
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,8,2,128,1,float16,float16,0,0.010558400303125381
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,8,2,128,1,float16,fp8,0,0.0124208003282547
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,8,2,128,1,fp8,fp8,0,0.012452799826860428
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,8,4,128,1,float16,fp8,0,0.012415999919176102
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,8,4,128,1,fp8,fp8,0,0.012465599924325943
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,8,8,128,1,float16,float16,0,0.010491199791431427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,8,8,128,1,float16,fp8,0,0.01048159971833229
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,8,8,128,1,fp8,fp8,0,0.010507199913263321
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,8,1,128,1,float16,float16,0,0.010411199927330018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,8,1,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,8,1,128,1,fp8,fp8,0,0.010449600219726563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,8,4,128,1,float16,float16,0,0.04912959933280945
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,8,1,128,1,float16,float16,0,0.014504000544548035
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,8,2,128,1,float16,fp8,0,0.010443200170993806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,8,2,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,8,4,128,1,float16,float16,0,0.010449600219726563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,8,1,128,1,float16,float16,0,0.026825600862503053
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,8,4,128,1,float16,fp8,0,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,8,4,128,1,fp8,fp8,0,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,8,8,128,1,float16,fp8,0,0.010494399815797806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,8,8,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,8,1,128,1,float16,float16,0,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,8,1,128,1,float16,fp8,0,0.010494399815797806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,8,1,128,1,fp8,fp8,0,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,8,2,128,1,float16,float16,0,0.010419200360774993
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,8,2,128,1,float16,fp8,0,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,8,2,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,8,4,128,1,float16,float16,0,0.01034879982471466
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,8,4,128,1,float16,fp8,0,0.010451199859380722
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,8,4,128,1,fp8,fp8,0,0.010465600341558457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,8,8,128,1,float16,float16,0,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,8,8,128,1,float16,fp8,0,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,8,1,128,1,float16,float16,0,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,8,8,128,1,fp8,fp8,0,0.0105103999376297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,8,1,128,1,float16,fp8,0,0.01040479987859726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,8,1,128,1,fp8,fp8,0,0.010471999645233154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,8,2,128,1,float16,float16,0,0.010390400141477584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,8,2,128,1,float16,fp8,0,0.010473600029945374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,8,2,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,8,4,128,1,float16,float16,0,0.01048159971833229
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,8,4,128,1,fp8,fp8,0,0.010447999835014344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,8,1,128,1,float16,float16,0,0.06346719861030578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,8,1,128,1,float16,fp8,0,0.07094879746437073
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,8,1,128,1,fp8,fp8,0,0.07145280241966248
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,8,2,128,1,float16,float16,0,0.06515200138092041
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,8,4,128,1,float16,float16,0,0.0125231996178627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,8,2,128,1,float16,fp8,0,0.07156640291213989
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,8,2,128,1,fp8,fp8,0,0.07118880152702331
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,8,4,128,1,float16,float16,0,0.0696560025215149
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,8,4,128,1,float16,fp8,0,0.07191839814186096
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,8,1,128,1,float16,float16,0,0.01250080019235611
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,8,4,128,1,fp8,fp8,0,0.07220640182495117
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,8,8,128,1,float16,float16,0,0.043971198797225955
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,8,8,128,1,float16,fp8,0,0.03909760117530823
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,8,8,128,1,fp8,fp8,0,0.03925440013408661
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,8,1,128,1,float16,float16,0,0.036248001456260684
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,8,1,128,1,float16,fp8,0,0.03912799954414368
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,8,1,128,1,fp8,fp8,0,0.03918879926204681
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,8,2,128,1,float16,float16,0,0.0367904007434845
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,8,2,128,1,float16,fp8,0,0.03916960060596466
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,8,2,128,1,fp8,fp8,0,0.039164799451828006
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,8,4,128,1,float16,fp8,0,0.039134401082992556
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,8,4,128,1,float16,float16,0,0.010566399991512298
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,8,4,128,1,fp8,fp8,0,0.03911519944667816
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,8,8,128,1,float16,float16,0,0.02605760097503662
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,8,8,128,1,float16,fp8,0,0.024784000217914583
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,8,8,128,1,fp8,fp8,0,0.02473440021276474
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,8,1,128,1,float16,fp8,0,0.02476319968700409
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,8,2,128,1,float16,float16,0,0.022681599855422972
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,8,2,128,1,float16,fp8,0,0.02481119930744171
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,8,2,128,1,fp8,fp8,0,0.024726399779319765
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,8,4,128,1,float16,float16,0,0.02277279943227768
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,8,4,128,1,float16,fp8,0,0.024742400646209715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,8,4,128,1,fp8,fp8,0,0.02486560046672821
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,8,8,128,1,float16,float16,0,0.016516800224781036
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,8,8,128,1,float16,fp8,0,0.01659359931945801
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,8,8,128,1,fp8,fp8,0,0.016551999747753142
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,8,1,128,1,float16,float16,0,0.014601600170135499
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,8,1,128,1,float16,fp8,0,0.016616000235080718
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,8,2,128,1,float16,float16,0,0.01040479987859726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,8,2,128,1,float16,float16,0,0.01462240070104599
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,8,2,128,1,float16,fp8,0,0.016551999747753142
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,8,8,128,1,float16,float16,0,0.010452800244092942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,8,2,128,1,fp8,fp8,0,0.016531200706958772
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,8,4,128,1,float16,float16,0,0.015612800419330598
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,8,4,128,1,float16,fp8,0,0.016574400663375854
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,8,4,128,1,fp8,fp8,0,0.016540800034999848
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,8,8,128,1,float16,float16,0,0.012503999471664428
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,8,8,128,1,float16,fp8,0,0.011307200044393539
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,8,8,128,1,fp8,fp8,0,0.011163199692964554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,8,1,128,1,float16,float16,0,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,8,1,128,1,float16,fp8,0,0.010585600137710571
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,8,2,128,1,float16,float16,0,0.010440000146627427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,8,2,128,1,float16,fp8,0,0.010993599891662598
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,8,2,128,1,fp8,fp8,0,0.011059200018644333
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,8,4,128,1,float16,float16,0,0.010520000010728836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,8,4,128,1,float16,fp8,0,0.011193600296974183
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,8,4,128,1,fp8,fp8,0,0.011392000317573547
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,8,8,128,1,float16,float16,0,0.010463999956846238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,8,8,128,1,float16,fp8,0,0.010465600341558457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,8,8,128,1,fp8,fp8,0,0.01048159971833229
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,8,1,128,1,float16,float16,0,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,8,1,128,1,float16,fp8,0,0.010467199981212616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,8,1,128,1,fp8,fp8,0,0.010435199737548828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,8,4,128,1,float16,float16,0,0.038756799697875974
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,8,2,128,1,float16,float16,0,0.010443200170993806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,8,2,128,1,float16,fp8,0,0.010412800312042236
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,8,1,128,1,float16,float16,0,0.02274080067873001
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,8,1,128,1,fp8,fp8,0,0.024766400456428528
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,8,4,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,8,4,128,1,fp8,fp8,0,0.010491199791431427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,8,8,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,8,8,128,1,float16,fp8,0,0.010452800244092942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,8,8,128,1,fp8,fp8,0,0.010454399883747101
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,8,1,128,1,float16,float16,0,0.010440000146627427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,8,1,128,1,float16,fp8,0,0.010438399761915207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,8,2,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,8,2,128,1,float16,fp8,0,0.010351999849081039
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,8,2,128,1,fp8,fp8,0,0.010486400127410889
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,8,4,128,1,float16,float16,0,0.010499200224876404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,8,4,128,1,float16,fp8,0,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,8,1,128,1,fp8,fp8,0,0.016604800522327424
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,8,4,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,8,8,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,8,8,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,8,8,128,1,fp8,fp8,0,0.01053600013256073
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,8,1,128,1,float16,float16,0,0.01045600026845932
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,8,1,128,1,float16,fp8,0,0.010468800365924836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,8,1,128,1,fp8,fp8,0,0.010795199871063232
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,8,2,128,1,float16,float16,0,0.010363200306892395
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,8,2,128,1,float16,fp8,0,0.010367999970912933
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,8,2,128,1,fp8,fp8,0,0.010411199927330018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,8,4,128,1,float16,fp8,0,0.010486400127410889
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,8,4,128,1,fp8,fp8,0,0.010371199995279311
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,8,1,128,1,fp8,fp8,0,0.011212799698114395
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,8,8,128,1,float16,float16,0,0.010916800051927567
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,8,8,128,1,float16,fp8,0,0.010372799634933472
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,8,8,128,1,fp8,fp8,0,0.010471999645233154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,8,1,128,1,float16,float16,0,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,8,1,128,1,float16,fp8,0,0.01026879996061325
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,8,1,128,1,fp8,fp8,0,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,8,2,128,1,float16,float16,0,0.010326399654150008
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,8,2,128,1,float16,fp8,0,0.010360000282526016
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,8,2,128,1,fp8,fp8,0,0.010467199981212616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,8,4,128,1,float16,float16,0,0.01037919968366623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,8,4,128,1,float16,fp8,0,0.010366400331258773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,8,4,128,1,fp8,fp8,0,0.01037919968366623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,8,1,128,1,float16,float16,0,0.05650879740715027
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,8,1,128,1,float16,fp8,0,0.05912479758262634
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,8,1,128,1,fp8,fp8,0,0.05909919738769531
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,8,2,128,1,float16,float16,0,0.056683200597763064
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,8,2,128,1,float16,fp8,0,0.05952960252761841
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,8,2,128,1,fp8,fp8,0,0.05912479758262634
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,8,4,128,1,float16,float16,0,0.05925760269165039
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,8,4,128,1,float16,fp8,0,0.059462398290634155
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,8,8,128,1,float16,float16,0,0.03580160140991211
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,8,2,128,1,fp8,fp8,0,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,8,4,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,8,8,128,1,fp8,fp8,0,0.033169600367546084
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,8,1,128,1,float16,float16,0,0.032979199290275575
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,8,1,128,1,float16,fp8,0,0.03296320140361786
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,8,1,128,1,fp8,fp8,0,0.03298400044441223
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,8,2,128,1,float16,float16,0,0.032995200157165526
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,8,2,128,1,float16,fp8,0,0.033030399680137636
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,8,2,128,1,fp8,fp8,0,0.03299840092658997
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,8,4,128,1,float16,float16,0,0.033913600444793704
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,8,4,128,1,float16,fp8,0,0.033022400736808774
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,8,4,128,1,fp8,fp8,0,0.03304159939289093
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,8,1,128,1,fp8,fp8,0,0.01045600026845932
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,8,8,128,1,float16,float16,0,0.022728000581264497
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,8,8,128,1,float16,fp8,0,0.020950399339199066
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,8,8,128,1,fp8,fp8,0,0.020715199410915375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,8,1,128,1,float16,float16,0,0.020742399990558623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,8,1,128,1,float16,fp8,0,0.020791999995708466
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,8,1,128,1,fp8,fp8,0,0.020745599269866945
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,8,2,128,1,float16,float16,0,0.02113119959831238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,8,2,128,1,float16,fp8,0,0.020708799362182617
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,8,2,128,1,fp8,fp8,0,0.02129279971122742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,8,4,128,1,float16,float16,0,0.02136480063199997
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,8,4,128,1,float16,fp8,0,0.020716799795627593
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,8,4,128,1,fp8,fp8,0,0.021164800226688384
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,8,8,128,1,float16,float16,0,0.014585599303245544
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,8,8,128,1,float16,fp8,0,0.014575999975204468
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,8,8,128,1,fp8,fp8,0,0.014545600116252898
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,8,1,128,1,float16,float16,0,0.014571200311183929
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,8,1,128,1,float16,fp8,0,0.014528000354766845
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,8,1,128,1,fp8,fp8,0,0.014563199877738953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,8,2,128,1,float16,float16,0,0.014584000408649444
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,8,2,128,1,fp8,fp8,0,0.01451680064201355
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,8,2,128,1,float16,fp8,0,0.014595200121402741
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,8,4,128,1,float16,float16,0,0.014529600739479065
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,8,4,128,1,float16,fp8,0,0.014563199877738953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,8,4,128,1,fp8,fp8,0,0.014574399590492249
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,8,8,128,1,float16,fp8,0,0.010473600029945374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,8,8,128,1,float16,float16,0,0.010694400221109391
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,8,1,128,1,float16,float16,0,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,8,1,128,1,float16,fp8,0,0.010471999645233154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,8,4,128,1,float16,float16,0,0.010366400331258773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,8,1,128,1,fp8,fp8,0,0.010436800122261048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,8,2,128,1,float16,float16,0,0.010486400127410889
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,8,2,128,1,float16,fp8,0,0.01045759990811348
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,8,2,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,8,4,128,1,float16,fp8,0,0.01043199971318245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,8,4,128,1,fp8,fp8,0,0.010542400181293488
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,8,8,128,1,float16,float16,0,0.010454399883747101
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,8,8,128,1,fp8,fp8,0,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,8,8,128,1,float16,fp8,0,0.010494399815797806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,8,1,128,1,float16,float16,0,0.010468800365924836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,8,1,128,1,float16,fp8,0,0.010436800122261048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,8,4,128,1,float16,fp8,0,0.01048320010304451
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,8,2,128,1,float16,float16,0,0.010446400195360184
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,8,1,128,1,fp8,fp8,0,0.010468800365924836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,8,2,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,8,4,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,8,4,128,1,float16,fp8,0,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,8,4,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,8,8,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,8,8,128,1,float16,fp8,0,0.010132800042629241
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,8,8,128,1,fp8,fp8,0,0.010443200170993806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,8,1,128,1,float16,float16,0,0.00963200032711029
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,8,1,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,8,1,128,1,fp8,fp8,0,0.009223999828100205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,8,2,128,1,float16,float16,0,0.010195200145244599
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,8,2,128,1,fp8,fp8,0,0.010311999917030334
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,8,4,128,1,float16,float16,0,0.010497599840164185
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,8,4,128,1,float16,fp8,0,0.010065600275993347
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,8,4,128,1,fp8,fp8,0,0.05912160277366638
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,8,8,128,1,float16,fp8,0,0.03375360071659088
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,8,8,128,1,float16,float16,0,0.010406400263309478
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,8,8,128,1,float16,fp8,0,0.010446400195360184
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,8,8,128,1,fp8,fp8,0,0.009769599884748459
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,8,1,128,1,float16,float16,0,0.00947680026292801
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,8,1,128,1,float16,fp8,0,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,8,1,128,1,fp8,fp8,0,0.00846560001373291
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,8,2,128,1,float16,float16,0,0.010372799634933472
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,8,2,128,1,float16,fp8,0,0.009257599711418152
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,8,2,128,1,fp8,fp8,0,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,8,4,128,1,float16,float16,0,0.009272000193595887
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,8,4,128,1,float16,fp8,0,0.010367999970912933
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,8,4,128,1,fp8,fp8,0,0.009374400228261947
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,8,8,128,1,float16,fp8,0,0.008502399921417237
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,8,8,128,1,fp8,fp8,0,0.008472000062465668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,8,1,128,1,float16,float16,0,0.00846560001373291
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,8,1,128,1,float16,fp8,0,0.009414400160312652
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,8,1,128,1,fp8,fp8,0,0.009468799829483033
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,8,2,128,1,float16,float16,0,0.009492799639701843
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,8,8,128,1,fp8,fp8,0,0.010521599650382995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,8,2,128,1,float16,fp8,0,0.009777600318193436
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,8,2,128,1,fp8,fp8,0,0.00843840017914772
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,8,4,128,1,float16,float16,0,0.009884800016880035
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,8,4,128,1,float16,fp8,0,0.008441600203514098
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,8,4,128,1,fp8,fp8,0,0.009438399970531464
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,8,4,128,1,float16,float16,0,0.010505600273609162
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,8,1,128,1,float16,float16,0,0.05571519732475281
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,8,1,128,1,float16,fp8,0,0.05443360209465027
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,8,2,128,1,float16,float16,0,0.05591840147972107
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,8,2,128,1,float16,fp8,0,0.05472319722175598
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,8,2,128,1,fp8,fp8,0,0.05429760217666626
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,8,4,128,1,float16,float16,0,0.0568943977355957
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,8,4,128,1,fp8,fp8,0,0.05450080037117004
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,8,2,128,1,float16,fp8,0,0.010476800054311753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,8,8,128,1,float16,float16,0,0.03336000144481659
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,8,8,128,1,float16,fp8,0,0.03094879984855652
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,8,8,128,1,fp8,fp8,0,0.030905601382255555
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,8,1,128,1,float16,float16,0,0.03210560083389282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,8,1,128,1,float16,fp8,0,0.03097119927406311
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,8,1,128,1,fp8,fp8,0,0.03097760081291199
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,8,2,128,1,float16,float16,0,0.0321152001619339
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,8,2,128,1,float16,fp8,0,0.031001600623130798
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,8,4,128,1,float16,float16,0,0.032892799377441405
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,8,2,128,1,fp8,fp8,0,0.030902400612831116
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,8,4,128,1,float16,fp8,0,0.03094240128993988
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,8,4,128,1,fp8,fp8,0,0.030988800525665283
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,8,4,128,1,fp8,fp8,0,0.010427200049161912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,8,8,128,1,float16,float16,0,0.020735999941825865
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,8,8,128,1,float16,fp8,0,0.020606400072574617
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,8,8,128,1,fp8,fp8,0,0.020681600272655486
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,8,1,128,1,float16,float16,0,0.02065120041370392
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,8,1,128,1,float16,fp8,0,0.02036159932613373
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,8,1,128,1,fp8,fp8,0,0.02067999988794327
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,8,2,128,1,float16,float16,0,0.020641599595546723
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,8,2,128,1,float16,fp8,0,0.0206496000289917
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,8,4,128,1,float16,float16,0,0.020644800364971162
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,8,4,128,1,float16,fp8,0,0.02067199945449829
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,8,8,128,1,float16,float16,0,0.014484800398349762
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,8,4,128,1,fp8,fp8,0,0.02070239931344986
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,8,8,128,1,float16,fp8,0,0.013767999410629273
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,8,8,128,1,fp8,fp8,0,0.014116799831390381
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,8,8,128,1,float16,float16,0,0.010380800068378448
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,8,1,128,1,float16,float16,0,0.014312000572681427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,8,1,128,1,float16,fp8,0,0.013617600500583648
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,8,2,128,1,float16,float16,0,0.01390880048274994
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,8,2,128,1,float16,fp8,0,0.01372160017490387
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,8,2,128,1,fp8,fp8,0,0.014185599982738495
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,8,4,128,1,float16,float16,0,0.014419199526309967
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,8,4,128,1,float16,fp8,0,0.014484800398349762
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,8,8,128,1,float16,float16,0,0.01040000021457672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,8,8,128,1,float16,fp8,0,0.01040479987859726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,8,8,128,1,fp8,fp8,0,0.010364799946546554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,8,1,128,1,float16,float16,0,0.010487999767065048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,8,1,128,1,float16,fp8,0,0.010398399829864503
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,8,1,128,1,fp8,fp8,0,0.010393600165843963
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,8,2,128,1,float16,float16,0,0.0104032002389431
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,8,1,128,1,fp8,fp8,0,0.054016000032424925
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,8,2,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,8,2,128,1,fp8,fp8,0,0.010497599840164185
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,8,4,128,1,float16,fp8,0,0.010463999956846238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,8,4,128,1,float16,fp8,0,0.05452640056610107
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,8,4,128,1,fp8,fp8,0,0.010496000200510025
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,8,8,128,1,float16,float16,0,0.01045600026845932
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,8,8,128,1,float16,fp8,0,0.010385599732398988
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,8,8,128,1,fp8,fp8,0,0.010476800054311753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,8,1,128,1,float16,float16,0,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,8,1,128,1,float16,fp8,0,0.010463999956846238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,8,1,128,1,fp8,fp8,0,0.010503999888896942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,8,2,128,1,float16,float16,0,0.010441599786281586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,8,2,128,1,float16,fp8,0,0.010465600341558457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,8,2,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,8,4,128,1,float16,float16,0,0.01048159971833229
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,8,4,128,1,float16,fp8,0,0.010438399761915207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,8,2,128,1,float16,fp8,0,0.010041599720716476
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,8,8,128,1,float16,float16,0,0.01040479987859726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,8,8,128,1,float16,fp8,0,0.009120000153779983
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,8,8,128,1,fp8,fp8,0,0.010473600029945374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,8,1,128,1,float16,float16,0,0.009815999865531921
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,8,1,128,1,float16,fp8,0,0.01048159971833229
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,8,1,128,1,fp8,fp8,0,0.00841279998421669
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,8,2,128,1,float16,float16,0,0.00992320030927658
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,8,2,128,1,float16,fp8,0,0.00941760018467903
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,8,2,128,1,fp8,fp8,0,0.009060800075531006
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,8,4,128,1,float16,float16,0,0.010443200170993806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,8,4,128,1,float16,fp8,0,0.008908800035715102
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,8,4,128,1,fp8,fp8,0,0.010447999835014344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,8,8,128,1,float16,float16,0,0.009015999734401703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,8,8,128,1,float16,fp8,0,0.00984319970011711
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,8,8,128,1,fp8,fp8,0,0.00851999968290329
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,8,2,128,1,fp8,fp8,0,0.020688000321388244
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,8,1,128,1,float16,float16,0,0.010471999645233154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,8,1,128,1,float16,fp8,0,0.00843520015478134
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,8,2,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,8,2,128,1,fp8,fp8,0,0.010041599720716476
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,8,4,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,8,4,128,1,float16,fp8,0,0.00952640026807785
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,8,1,128,1,fp8,fp8,0,0.014473600685596466
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,8,4,128,1,fp8,fp8,0,0.008408000320196151
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,8,8,128,1,float16,float16,0,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,8,8,128,1,float16,fp8,0,0.008479999750852585
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,8,4,128,1,fp8,fp8,0,0.014305600523948669
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,8,8,128,1,fp8,fp8,0,0.008441600203514098
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,8,1,128,1,float16,float16,0,0.010385599732398988
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,8,1,128,1,float16,fp8,0,0.008428800106048583
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,8,1,128,1,fp8,fp8,0,0.008422400057315826
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,8,2,128,1,float16,float16,0,0.00851840004324913
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,8,2,128,1,fp8,fp8,0,0.008497600257396699
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,8,2,128,1,float16,fp8,0,0.00864799991250038
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,8,4,128,1,float16,float16,0,0.008484800159931184
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,8,4,128,1,fp8,fp8,0,0.00910400003194809
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,8,1,128,1,float16,fp8,0,0.050160002708435056
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,8,1,128,1,fp8,fp8,0,0.049902400374412535
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,8,2,128,1,float16,float16,0,0.05395359992980957
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,8,2,128,1,float16,fp8,0,0.04987359941005707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,8,2,128,1,fp8,fp8,0,0.05003520250320435
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,8,4,128,1,float16,float16,0,0.05458400249481201
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,8,4,128,1,float16,fp8,0,0.050183999538421634
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,8,4,128,1,fp8,fp8,0,0.05007359981536865
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,8,8,128,1,float16,float16,0,0.031318399310112
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,8,4,128,1,float16,float16,0,0.01040479987859726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,8,8,128,1,float16,fp8,0,0.02887359857559204
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,8,8,128,1,fp8,fp8,0,0.02884480059146881
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,8,1,128,1,float16,float16,0,0.030899199843406677
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,8,1,128,1,float16,fp8,0,0.028896000981330872
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,8,1,128,1,fp8,fp8,0,0.02887679934501648
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,8,2,128,1,float16,float16,0,0.030988800525665283
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,8,2,128,1,float16,fp8,0,0.02898559868335724
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,8,2,128,1,fp8,fp8,0,0.028921601176261903
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,8,4,128,1,float16,float16,0,0.030873599648475646
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,8,4,128,1,float16,fp8,0,0.028865599632263185
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,8,4,128,1,fp8,fp8,0,0.028803199529647827
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,8,8,128,1,float16,float16,0,0.020468799769878386
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,8,8,128,1,float16,fp8,0,0.018644799292087556
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,8,8,128,1,fp8,fp8,0,0.018680000305175783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,8,1,128,1,float16,float16,0,0.020136000216007234
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,8,1,128,1,float16,fp8,0,0.018639999628067016
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,8,4,128,1,fp8,fp8,0,0.010471999645233154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,8,1,128,1,fp8,fp8,0,0.0186256006360054
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,8,2,128,1,float16,float16,0,0.020972800254821778
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,8,2,128,1,float16,fp8,0,0.018632000684738158
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,8,4,128,1,float16,float16,0,0.02067999988794327
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,8,2,128,1,fp8,fp8,0,0.018676799535751343
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,8,4,128,1,fp8,fp8,0,0.01863040030002594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,8,4,128,1,float16,fp8,0,0.018671999871730804
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,8,8,128,1,float16,float16,0,0.014563199877738953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,8,8,128,1,float16,fp8,0,0.012593600153923034
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,8,8,128,1,fp8,fp8,0,0.012622399628162384
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,8,1,128,1,float16,float16,0,0.014496000111103058
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,8,1,128,1,float16,fp8,0,0.012931199371814727
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,8,2,128,1,float16,float16,0,0.01448799967765808
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,8,2,128,1,float16,fp8,0,0.012923200428485871
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,8,2,128,1,fp8,fp8,0,0.012503999471664428
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,8,4,128,1,float16,float16,0,0.014473600685596466
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,8,4,128,1,float16,fp8,0,0.012545600533485413
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,8,4,128,1,fp8,fp8,0,0.013014400005340576
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,8,1,128,1,fp8,fp8,0,0.009436800330877303
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,8,8,128,1,fp8,fp8,0,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,8,8,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,8,1,128,1,float16,float16,0,0.010476800054311753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,8,1,128,1,float16,fp8,0,0.010462400317192078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,8,1,128,1,fp8,fp8,0,0.010427200049161912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,8,2,128,1,float16,float16,0,0.010473600029945374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,8,2,128,1,float16,fp8,0,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,8,2,128,1,fp8,fp8,0,0.010424000024795533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,8,4,128,1,float16,float16,0,0.010443200170993806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,8,4,128,1,float16,fp8,0,0.010428799688816071
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,8,4,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,8,8,128,1,float16,float16,0,0.010865599662065507
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,8,8,128,1,float16,fp8,0,0.01043360009789467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,8,8,128,1,fp8,fp8,0,0.009419199824333192
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,8,4,128,1,float16,fp8,0,0.008479999750852585
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,8,1,128,1,float16,float16,0,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,8,1,128,1,float16,fp8,0,0.009515199810266495
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,8,1,128,1,fp8,fp8,0,0.01045600026845932
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,8,1,128,1,float16,float16,0,0.05390080213546753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,8,2,128,1,float16,fp8,0,0.010372799634933472
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,8,2,128,1,fp8,fp8,0,0.01045759990811348
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,8,4,128,1,float16,float16,0,0.010416000336408614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,8,4,128,1,float16,fp8,0,0.010444799810647965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,8,4,128,1,fp8,fp8,0,0.010502400249242783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,8,8,128,1,float16,float16,0,0.010353600233793258
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,8,8,128,1,float16,fp8,0,0.010225600004196167
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,8,8,128,1,fp8,fp8,0,0.010462400317192078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,8,1,128,1,float16,float16,0,0.010388799756765366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,8,1,128,1,float16,fp8,0,0.009545599669218063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,8,1,128,1,fp8,fp8,0,0.008665599673986436
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,8,2,128,1,float16,float16,0,0.010355199873447418
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,8,2,128,1,float16,fp8,0,0.008473599702119828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,8,2,128,1,fp8,fp8,0,0.00902400016784668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,8,4,128,1,float16,float16,0,0.010387200117111205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,8,4,128,1,float16,fp8,0,0.008419200032949447
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,8,4,128,1,fp8,fp8,0,0.008894400298595428
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,8,8,128,1,float16,fp8,0,0.008481600135564805
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,8,8,128,1,fp8,fp8,0,0.008399999886751174
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,8,1,128,1,float16,fp8,0,0.00843520015478134
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,8,1,128,1,fp8,fp8,0,0.00846880003809929
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,8,2,128,1,float16,float16,0,0.008641599863767623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,8,2,128,1,float16,fp8,0,0.008484800159931184
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,8,2,128,1,fp8,fp8,0,0.009455999732017517
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,8,4,128,1,float16,float16,0,0.010382399708032609
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,8,4,128,1,float16,fp8,0,0.009148799628019334
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,8,4,128,1,fp8,fp8,0,0.00945120006799698
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,8,8,128,1,float16,float16,0,0.010446400195360184
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,8,8,128,1,float16,fp8,0,0.009156800061464309
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,8,1,128,1,fp8,fp8,0,0.012859199941158295
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,8,8,128,1,fp8,fp8,0,0.009145600348711013
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,8,1,128,1,float16,float16,0,0.008416000008583068
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,8,1,128,1,float16,fp8,0,0.008540800213813782
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,8,1,128,1,fp8,fp8,0,0.008425600081682205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,8,8,128,1,float16,float16,0,0.010407999902963639
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,8,2,128,1,float16,fp8,0,0.00844319984316826
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,8,2,128,1,float16,fp8,0,0.008425600081682205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,8,4,128,1,float16,float16,0,0.008414400368928909
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,8,4,128,1,float16,fp8,0,0.008399999886751174
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,8,4,128,1,fp8,fp8,0,0.009484799951314926
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,8,2,128,1,float16,float16,0,0.010396800190210342
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,8,8,128,1,float16,float16,0,0.009428799897432328
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,4,1,128,1,float16,fp8,0,1.5787792205810547
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,8,1,128,1,float16,float16,0,0.008414400368928909
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,4,2,128,1,float16,float16,0,1.7499216079711915
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,4,4,128,1,float16,float16,0,0.9335391998291016
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,4,2,128,1,float16,fp8,0,1.581007957458496
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,8,2,128,1,float16,float16,0,0.008500800281763077
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,8,2,128,1,fp8,fp8,0,0.010215999931097031
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,4,4,128,1,fp8,fp8,0,0.9106032371520996
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,4,4,128,1,float16,fp8,0,1.0093184471130372
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,4,1,128,1,float16,float16,0,0.9111023902893066
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,4,1,128,1,float16,fp8,0,0.8536831855773925
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,4,1,128,1,fp8,fp8,0,0.8538512229919434
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,4,2,128,1,float16,fp8,0,0.8853887557983399
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,4,4,128,1,float16,float16,0,0.5230303764343261
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,4,2,128,1,fp8,fp8,0,0.9728223800659179
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,4,1,128,1,float16,float16,0,1.7113359451293946
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,4,1,128,1,fp8,fp8,0,1.5823776245117187
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,4,4,128,1,float16,fp8,0,0.49707999229431155
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,4,4,128,1,fp8,fp8,0,0.4923215866088867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,4,1,128,1,float16,fp8,0,0.4880815982818604
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,4,1,128,1,float16,float16,0,0.524455976486206
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,4,1,128,1,fp8,fp8,0,0.4876863956451416
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,4,2,128,1,float16,float16,0,0.5148543834686279
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,4,2,128,1,float16,fp8,0,0.4897183895111084
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,4,4,128,1,float16,fp8,0,0.30790879726409914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,4,4,128,1,float16,float16,0,0.3286911964416504
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,4,4,128,1,fp8,fp8,0,0.31059200763702394
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,4,2,128,1,fp8,fp8,0,0.4894400119781494
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,4,1,128,1,float16,float16,0,0.31480960845947265
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,4,1,128,1,float16,fp8,0,0.30762081146240233
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,4,1,128,1,fp8,fp8,0,0.3125488042831421
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,4,2,128,1,float16,float16,0,0.3183023929595947
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,4,2,128,1,float16,fp8,0,0.30921120643615724
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,4,2,128,1,fp8,fp8,0,1.579259204864502
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,4,2,128,1,fp8,fp8,0,0.3082592010498047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,4,1,128,1,float16,float16,0,1.0142512321472168
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,4,1,128,1,float16,fp8,0,0.9770336151123047
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,4,1,128,1,fp8,fp8,0,0.9589887619018554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,4,2,128,1,float16,fp8,0,0.9586576461791992
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,4,2,128,1,float16,float16,0,1.0124256134033203
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,4,2,128,1,fp8,fp8,0,0.9588879585266114
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,4,4,128,1,float16,fp8,0,0.5287712097167969
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,4,4,128,1,fp8,fp8,0,0.5290304183959961
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,4,1,128,1,float16,float16,0,0.5445824146270752
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,4,1,128,1,float16,fp8,0,0.5265408039093018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,4,1,128,1,fp8,fp8,0,0.527185583114624
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,4,2,128,1,float16,float16,0,0.5499152183532715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,4,4,128,1,float16,float16,0,0.32397439479827883
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,4,4,128,1,float16,fp8,0,0.309716796875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,4,2,128,1,float16,fp8,0,0.527284812927246
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,4,2,128,1,fp8,fp8,0,0.5276559829711914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,4,4,128,1,fp8,fp8,0,0.3087248086929321
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,4,1,128,1,float16,float16,0,0.31269919872283936
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,4,1,128,1,float16,fp8,0,0.3084847927093506
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,4,1,128,1,fp8,fp8,0,0.31795520782470704
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,4,2,128,1,float16,float16,0,0.3135472059249878
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,4,4,128,1,float16,float16,0,0.2064176082611084
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,4,2,128,1,fp8,fp8,0,0.30887680053710936
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,4,4,128,1,fp8,fp8,0,0.20273280143737793
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,4,4,128,1,float16,fp8,0,0.19880959987640381
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,4,1,128,1,float16,float16,0,0.20233280658721925
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,4,1,128,1,float16,fp8,0,0.19852639436721803
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,4,1,128,1,fp8,fp8,0,0.19876159429550172
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,4,2,128,1,float16,float16,0,0.20437281131744384
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,4,2,128,1,float16,fp8,0,0.1980847954750061
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,4,2,128,1,fp8,fp8,0,0.1984063982963562
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,4,1,128,1,float16,float16,0,0.7290480136871338
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,4,1,128,1,float16,fp8,0,0.7042416095733642
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,4,1,128,1,fp8,fp8,0,0.70447678565979
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,4,2,128,1,float16,fp8,0,0.7041664123535156
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,4,2,128,1,float16,float16,0,0.7300159931182861
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,4,4,128,1,float16,fp8,0,0.39547200202941896
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,4,4,128,1,fp8,fp8,0,0.39432640075683595
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,4,1,128,1,float16,float16,0,0.3959791898727417
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,4,2,128,1,fp8,fp8,0,0.7038368225097656
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,4,1,128,1,float16,fp8,0,0.39241440296173097
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,4,1,128,1,fp8,fp8,0,0.3916847944259644
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,4,2,128,1,float16,float16,0,0.4129295825958252
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,4,2,128,1,float16,fp8,0,0.3932032108306885
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,4,4,128,1,float16,float16,0,0.5667903900146485
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,4,4,128,1,float16,float16,0,0.24866878986358643
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,4,2,128,1,fp8,fp8,0,0.39380319118499757
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,4,4,128,1,float16,fp8,0,0.250543999671936
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,4,4,128,1,fp8,fp8,0,0.2410207986831665
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,4,1,128,1,float16,float16,0,0.23979361057281495
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,4,1,128,1,float16,fp8,0,0.2404047966003418
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,4,1,128,1,fp8,fp8,0,0.24094080924987793
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,4,2,128,1,float16,float16,0,0.24975841045379638
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,4,2,128,1,float16,fp8,0,0.239355206489563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,4,2,128,1,fp8,fp8,0,0.24044160842895507
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,4,4,128,1,float16,fp8,0,0.14684640169143676
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,4,4,128,1,float16,float16,0,0.15105439424514772
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,4,4,128,1,fp8,fp8,0,0.15308159589767456
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,4,1,128,1,float16,float16,0,0.1479408025741577
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,4,1,128,1,float16,fp8,0,0.14751839637756348
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,4,1,128,1,fp8,fp8,0,0.14631839990615844
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,4,2,128,1,float16,float16,0,0.1480463981628418
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,4,2,128,1,float16,fp8,0,0.1529855966567993
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,4,2,128,1,float16,fp8,0,0.3095360040664673
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,4,1,128,1,float16,float16,0,0.9246928215026855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,4,1,128,1,float16,fp8,0,0.9064191818237305
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,4,1,128,1,fp8,fp8,0,0.9062944412231445
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,4,2,128,1,float16,float16,0,0.9311951637268067
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,4,2,128,1,float16,fp8,0,0.909398365020752
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,4,4,128,1,float16,fp8,0,0.4885136127471924
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,4,4,128,1,fp8,fp8,0,0.5287568092346191
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,4,1,128,1,float16,float16,0,0.4892064094543457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,4,4,128,1,float16,float16,0,0.4117584228515625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,4,2,128,1,fp8,fp8,0,1.0480527877807617
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,4,1,128,1,float16,fp8,0,0.4872735977172852
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,4,1,128,1,fp8,fp8,0,0.5175695896148682
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,4,2,128,1,float16,float16,0,0.4929168224334717
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,4,2,128,1,float16,fp8,0,0.48899040222167967
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,4,4,128,1,float16,float16,0,0.2873615980148315
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,4,2,128,1,float16,float16,0,0.9139311790466309
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,4,4,128,1,float16,fp8,0,0.2839263916015625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,4,2,128,1,fp8,fp8,0,0.5191711902618408
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,4,4,128,1,fp8,fp8,0,0.28073279857635497
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,4,1,128,1,float16,float16,0,0.2731136083602905
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,4,1,128,1,float16,fp8,0,0.2795664072036743
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,4,2,128,1,float16,float16,0,0.2829279899597168
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,4,1,128,1,fp8,fp8,0,0.3008239984512329
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,4,2,128,1,float16,fp8,0,0.27961599826812744
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,4,2,128,1,fp8,fp8,0,0.2786848068237305
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,4,4,128,1,float16,float16,0,0.1806704044342041
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,4,4,128,1,float16,fp8,0,0.18249759674072266
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,4,4,128,1,fp8,fp8,0,0.18256959915161133
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,4,1,128,1,float16,float16,0,0.17396160364151
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,4,1,128,1,fp8,fp8,0,0.1761520028114319
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,4,1,128,1,float16,fp8,0,0.17501440048217773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,4,2,128,1,float16,float16,0,0.17971999645233155
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,4,2,128,1,float16,fp8,0,0.18241759538650512
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,4,2,128,1,fp8,fp8,0,0.14665440320968628
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,4,2,128,1,fp8,fp8,0,0.17518399953842162
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,4,4,128,1,float16,float16,0,0.11343040466308593
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,4,4,128,1,float16,fp8,0,0.11118719577789307
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,4,1,128,1,float16,float16,0,0.11561759710311889
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,4,1,128,1,fp8,fp8,0,0.11100319623947144
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,4,2,128,1,float16,float16,0,0.11139039993286133
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,4,2,128,1,float16,fp8,0,0.11059999465942383
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,4,1,128,1,float16,fp8,0,0.11098560094833373
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,4,2,128,1,fp8,fp8,0,0.11524800062179566
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,4,1,128,1,float16,float16,0,0.5461679935455322
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,4,1,128,1,fp8,fp8,0,0.5652080059051514
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,4,1,128,1,float16,fp8,0,0.5656064033508301
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,4,2,128,1,float16,float16,0,0.5557104110717773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,4,2,128,1,float16,fp8,0,0.5874320030212402
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,4,4,128,1,float16,float16,0,0.3151711940765381
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,4,4,128,1,fp8,fp8,0,0.3114304065704346
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,4,1,128,1,float16,float16,0,0.30036799907684325
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,4,4,128,1,float16,float16,0,0.5114192008972168
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,4,2,128,1,fp8,fp8,0,0.5669104099273682
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,4,1,128,1,float16,fp8,0,0.31054880619049074
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,4,1,128,1,fp8,fp8,0,0.3095423936843872
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,4,2,128,1,float16,float16,0,0.2995919942855835
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,4,2,128,1,float16,fp8,0,0.30996639728546144
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,4,2,128,1,fp8,fp8,0,0.3112368106842041
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,4,4,128,1,float16,float16,0,0.18262399435043336
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,4,4,128,1,float16,fp8,0,0.18124159574508666
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,4,4,128,1,fp8,fp8,0,0.1823040008544922
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,4,1,128,1,float16,float16,0,0.17243200540542603
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,4,1,128,1,float16,fp8,0,0.18123359680175782
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,4,1,128,1,fp8,fp8,0,0.18044480085372924
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,4,2,128,1,float16,float16,0,0.17345919609069824
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,4,2,128,1,float16,fp8,0,0.18101760149002075
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,4,2,128,1,fp8,fp8,0,0.1805727958679199
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,4,4,128,1,float16,float16,0,0.11741440296173096
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,4,4,128,1,float16,fp8,0,0.11551519632339477
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,4,4,128,1,fp8,fp8,0,0.11539360284805297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,4,1,128,1,float16,float16,0,0.11358079910278321
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,4,1,128,1,fp8,fp8,0,0.11557279825210572
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,4,2,128,1,float16,float16,0,0.11449439525604248
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,4,2,128,1,float16,fp8,0,0.11527999639511108
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,4,2,128,1,fp8,fp8,0,0.11584479808807373
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,4,4,128,1,float16,float16,0,0.0871295988559723
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,4,4,128,1,float16,fp8,0,0.0843168020248413
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,4,4,128,1,fp8,fp8,0,0.08618720173835755
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,4,1,128,1,float16,float16,0,0.08536480069160461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,4,1,128,1,float16,fp8,0,0.08482239842414856
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,4,1,128,1,fp8,fp8,0,0.08417119979858398
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,4,2,128,1,float16,float16,0,0.08557760119438171
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,4,2,128,1,float16,fp8,0,0.08512639999389648
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,4,2,128,1,fp8,fp8,0,0.08596000075340271
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,4,4,128,1,fp8,fp8,0,0.11486239433288574
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,4,1,128,1,float16,float16,0,0.5152991771697998
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,4,2,128,1,float16,float16,0,0.5189375877380371
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,4,1,128,1,float16,fp8,0,0.5641392230987549
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,4,1,128,1,fp8,fp8,0,0.5643199920654297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,4,4,128,1,float16,float16,0,0.2973376035690308
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,4,2,128,1,float16,fp8,0,0.5659647941589355
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,4,4,128,1,float16,fp8,0,0.3032495975494385
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,4,4,128,1,fp8,fp8,0,0.30364480018615725
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,4,2,128,1,fp8,fp8,0,0.5645199775695801
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,4,1,128,1,float16,float16,0,0.2729824066162109
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,4,1,128,1,float16,fp8,0,0.3033983945846558
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,4,2,128,1,float16,float16,0,0.27681119441986085
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,4,2,128,1,float16,fp8,0,0.30239999294281006
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,4,2,128,1,fp8,fp8,0,0.3025216102600098
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,4,4,128,1,float16,float16,0,0.17172160148620605
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,4,4,128,1,float16,fp8,0,0.3116192102432251
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,4,4,128,1,float16,fp8,0,0.17310880422592162
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,4,1,128,1,float16,float16,0,0.15695199966430665
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,4,4,128,1,fp8,fp8,0,0.17294880151748657
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,4,1,128,1,float16,fp8,0,0.1703920006752014
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,4,1,128,1,fp8,fp8,0,0.1737056016921997
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,4,2,128,1,float16,float16,0,0.16100800037384033
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,4,2,128,1,float16,fp8,0,0.1715872049331665
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,4,4,128,1,float16,fp8,0,0.10585440397262573
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,4,2,128,1,fp8,fp8,0,0.1714303970336914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,4,1,128,1,float16,float16,0,0.10070879459381103
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,4,4,128,1,fp8,fp8,0,0.10722719430923462
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,4,1,128,1,float16,fp8,0,0.10628960132598878
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,4,1,128,1,fp8,fp8,0,0.1054527997970581
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,4,2,128,1,float16,float16,0,0.10066399574279786
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,4,2,128,1,float16,fp8,0,0.10636639595031738
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,4,2,128,1,fp8,fp8,0,0.10647039413452149
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,4,4,128,1,float16,float16,0,0.06541439890861511
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,4,4,128,1,fp8,fp8,0,0.06577439904212952
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,4,1,128,1,float16,float16,0,0.06369280219078063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,4,1,128,1,float16,fp8,0,0.06565759778022766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,4,1,128,1,fp8,fp8,0,0.06580960154533386
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,4,2,128,1,float16,float16,0,0.06366879940032959
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,4,2,128,1,float16,fp8,0,0.0657423973083496
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,4,2,128,1,fp8,fp8,0,0.06584320068359376
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,4,4,128,1,float16,float16,0,0.060068798065185544
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,4,4,128,1,float16,fp8,0,0.059849601984024045
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,4,1,128,1,float16,float16,0,0.05965920090675354
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,4,4,128,1,fp8,fp8,0,0.059708797931671144
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,4,1,128,1,fp8,fp8,0,0.05963199734687805
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,4,2,128,1,float16,float16,0,0.05963039994239807
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,4,1,128,1,float16,fp8,0,0.11628320217132568
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,4,2,128,1,fp8,fp8,0,0.05974879860877991
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,4,1,128,1,float16,float16,0,0.3158735990524292
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,4,1,128,1,float16,fp8,0,0.36579360961914065
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,4,1,128,1,fp8,fp8,0,0.3691168069839478
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,4,2,128,1,float16,float16,0,0.3173952102661133
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,4,2,128,1,float16,fp8,0,0.368505597114563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,4,2,128,1,fp8,fp8,0,0.36694719791412356
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,4,4,128,1,float16,float16,0,0.19051680564880372
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,4,1,128,1,fp8,fp8,0,0.3020303964614868
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,4,4,128,1,float16,fp8,0,0.20068159103393554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,4,4,128,1,fp8,fp8,0,0.201580810546875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,4,1,128,1,float16,float16,0,0.17345600128173827
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,4,1,128,1,float16,fp8,0,0.19928640127182007
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,4,1,128,1,fp8,fp8,0,0.19956640005111695
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,4,2,128,1,float16,float16,0,0.17501599788665773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,4,4,128,1,float16,float16,0,0.10611679553985595
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,4,2,128,1,float16,fp8,0,0.19986239671707154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,4,2,128,1,fp8,fp8,0,0.20006239414215088
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,4,4,128,1,float16,float16,0,0.11350879669189454
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,4,4,128,1,float16,fp8,0,0.11572799682617188
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,4,1,128,1,float16,float16,0,0.10382720232009887
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,4,4,128,1,float16,fp8,0,0.06578400135040283
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,4,1,128,1,float16,fp8,0,0.115665602684021
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,4,1,128,1,fp8,fp8,0,0.11605600118637086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,4,2,128,1,float16,float16,0,0.1050976037979126
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,4,2,128,1,float16,fp8,0,0.11561280488967896
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,4,4,128,1,fp8,fp8,0,0.07215840220451356
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,4,1,128,1,float16,float16,0,0.06795200109481811
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,4,1,128,1,float16,fp8,0,0.05960800051689148
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,4,1,128,1,float16,fp8,0,0.07204319834709168
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,4,1,128,1,fp8,fp8,0,0.07274399995803833
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,4,2,128,1,float16,fp8,0,0.05958240032196045
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,4,2,128,1,float16,fp8,0,0.07292799949645996
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,4,2,128,1,fp8,fp8,0,0.07221279740333557
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,4,4,128,1,float16,float16,0,0.05144799947738647
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,4,4,128,1,float16,fp8,0,0.05151839852333069
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,4,4,128,1,fp8,fp8,0,0.051446402072906496
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,4,1,128,1,float16,float16,0,0.04945279955863953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,4,1,128,1,float16,fp8,0,0.05146399736404419
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,4,1,128,1,fp8,fp8,0,0.05143839716911316
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,4,2,128,1,float16,float16,0,0.04956640005111694
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,4,2,128,1,float16,fp8,0,0.05146080255508423
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,4,2,128,1,fp8,fp8,0,0.0514959990978241
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,4,4,128,1,float16,float16,0,0.0492031991481781
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,4,4,128,1,float16,fp8,0,0.04731679856777191
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,4,4,128,1,fp8,fp8,0,0.0473471999168396
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,4,1,128,1,float16,float16,0,0.04757919907569885
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,4,1,128,1,float16,fp8,0,0.04726879894733429
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,4,1,128,1,fp8,fp8,0,0.047336000204086306
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,4,2,128,1,float16,fp8,0,0.0473471999168396
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,4,2,128,1,fp8,fp8,0,0.047332799434661864
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,4,1,128,1,float16,float16,0,0.31062400341033936
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,4,1,128,1,float16,fp8,0,0.38962559700012206
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,4,1,128,1,fp8,fp8,0,0.3920144081115723
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,4,2,128,1,float16,float16,0,0.31814239025115965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,4,4,128,1,fp8,fp8,0,0.11589280366897584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,4,2,128,1,float16,fp8,0,0.3902496099472046
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,4,2,128,1,fp8,fp8,0,0.39077279567718504
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,4,4,128,1,float16,float16,0,0.1919360041618347
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,4,2,128,1,fp8,fp8,0,0.11516799926757812
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,4,4,128,1,float16,fp8,0,0.07261599898338318
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,4,4,128,1,fp8,fp8,0,0.20887839794158936
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,4,1,128,1,float16,float16,0,0.1692863941192627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,4,1,128,1,float16,fp8,0,0.20695359706878663
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,4,2,128,1,float16,float16,0,0.17041440010070802
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,4,2,128,1,float16,fp8,0,0.209006404876709
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,4,4,128,1,float16,float16,0,0.11034879684448243
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,4,2,128,1,float16,float16,0,0.06793760061264038
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,4,2,128,1,fp8,fp8,0,0.2075711965560913
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,4,4,128,1,float16,fp8,0,0.11795040369033813
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,4,1,128,1,float16,float16,0,0.09825440049171448
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,4,1,128,1,float16,fp8,0,0.11532319784164428
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,4,1,128,1,fp8,fp8,0,0.11567519903182984
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,4,2,128,1,float16,float16,0,0.09987040162086487
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,4,2,128,1,float16,fp8,0,0.11555839776992798
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,4,4,128,1,float16,float16,0,0.06795520186424256
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,4,2,128,1,fp8,fp8,0,0.1157696008682251
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,4,4,128,1,float16,fp8,0,0.06967359781265259
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,4,4,128,1,fp8,fp8,0,0.06987360119819641
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,4,1,128,1,float16,float16,0,0.06153920292854309
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,4,1,128,1,float16,fp8,0,0.06934720277786255
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,4,1,128,1,fp8,fp8,0,0.06991040110588073
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,4,2,128,1,float16,float16,0,0.0616703987121582
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,4,2,128,1,float16,fp8,0,0.06986399888992309
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,4,2,128,1,float16,float16,0,0.047651201486587524
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,4,2,128,1,fp8,fp8,0,0.0694159984588623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,4,4,128,1,float16,float16,0,0.04111199975013733
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,4,4,128,1,float16,fp8,0,0.043275201320648195
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,4,4,128,1,fp8,fp8,0,0.04275040030479431
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,4,1,128,1,float16,fp8,0,0.04318720102310181
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,4,1,128,1,fp8,fp8,0,0.04306080043315887
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,4,2,128,1,float16,float16,0,0.0391184002161026
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,4,2,128,1,float16,fp8,0,0.043137601017951964
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,4,2,128,1,fp8,fp8,0,0.04315199851989746
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,4,4,128,1,float16,float16,0,0.03707520067691803
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,4,4,128,1,float16,fp8,0,0.03709760010242462
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,4,4,128,1,fp8,fp8,0,0.03711360096931458
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,4,1,128,1,float16,float16,0,0.03519999980926514
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,4,1,128,1,float16,fp8,0,0.03707840144634247
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,4,1,128,1,fp8,fp8,0,0.03711679875850678
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,4,2,128,1,float16,float16,0,0.035150399804115294
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,4,2,128,1,float16,fp8,0,0.037092798948287965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,4,2,128,1,fp8,fp8,0,0.0370959997177124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,4,4,128,1,float16,float16,0,0.035016000270843506
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,4,4,128,1,float16,fp8,0,0.03503519892692566
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,4,4,128,1,fp8,fp8,0,0.03500959873199463
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,4,1,128,1,float16,float16,0,0.035041600465774536
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,4,1,128,1,float16,fp8,0,0.03499839901924133
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,4,1,128,1,fp8,fp8,0,0.035087999701499936
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,4,2,128,1,float16,float16,0,0.03504480123519897
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,4,4,128,1,float16,float16,0,0.07167360186576843
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,4,2,128,1,float16,fp8,0,0.03505280017852783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,4,2,128,1,fp8,fp8,0,0.035016000270843506
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,4,1,128,1,float16,float16,0,0.20277280807495118
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,4,1,128,1,fp8,fp8,0,0.2080159902572632
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,4,1,128,1,float16,fp8,0,0.26415200233459474
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,4,1,128,1,fp8,fp8,0,0.26503520011901854
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,4,2,128,1,float16,float16,0,0.20551040172576904
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,4,2,128,1,float16,fp8,0,0.2665231943130493
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,4,4,128,1,float16,float16,0,0.12977440357208253
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,4,4,128,1,fp8,fp8,0,0.1173359990119934
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,4,2,128,1,fp8,fp8,0,0.2644495964050293
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,4,4,128,1,fp8,fp8,0,0.14494719505310058
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,4,1,128,1,float16,fp8,0,0.1424239993095398
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,4,1,128,1,fp8,fp8,0,0.14244480133056642
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,4,2,128,1,float16,float16,0,0.114956796169281
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,4,2,128,1,float16,fp8,0,0.14318560361862182
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,4,4,128,1,float16,float16,0,0.0771664023399353
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,4,4,128,1,float16,fp8,0,0.08118240237236023
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,4,1,128,1,float16,float16,0,0.06681119799613952
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,4,1,128,1,float16,fp8,0,0.08113279938697815
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,4,4,128,1,fp8,fp8,0,0.08152160048484802
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,4,1,128,1,float16,float16,0,0.03904480040073395
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,4,1,128,1,fp8,fp8,0,0.08066400289535522
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,4,2,128,1,float16,float16,0,0.06811839938163758
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,4,2,128,1,float16,fp8,0,0.08052160143852234
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,4,2,128,1,fp8,fp8,0,0.08073599934577942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,4,4,128,1,float16,float16,0,0.04729759991168976
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,4,4,128,1,float16,fp8,0,0.049414399266242984
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,4,1,128,1,float16,float16,0,0.04323680102825165
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,4,4,128,1,fp8,fp8,0,0.04984799921512604
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,4,1,128,1,float16,fp8,0,0.04937599897384644
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,4,1,128,1,fp8,fp8,0,0.04939680099487305
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,4,2,128,1,float16,float16,0,0.04334399998188019
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,4,2,128,1,float16,fp8,0,0.04941920042037964
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,4,2,128,1,fp8,fp8,0,0.04938080012798309
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,4,4,128,1,float16,float16,0,0.03296799957752228
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,4,4,128,1,float16,fp8,0,0.03386560082435608
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,4,4,128,1,fp8,fp8,0,0.03399839997291565
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,4,1,128,1,float16,float16,0,0.030953601002693176
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,4,1,128,1,float16,fp8,0,0.03426080048084259
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,4,1,128,1,fp8,fp8,0,0.033537599444389346
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,4,2,128,1,float16,float16,0,0.03091840147972107
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,4,2,128,1,float16,fp8,0,0.03476319909095764
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,4,2,128,1,fp8,fp8,0,0.03406560122966766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,4,4,128,1,float16,float16,0,0.030556800961494445
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,4,4,128,1,float16,fp8,0,0.030935999751091004
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,4,4,128,1,fp8,fp8,0,0.030796799063682555
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,4,1,128,1,float16,float16,0,0.02889760136604309
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,4,1,128,1,fp8,fp8,0,0.030833598971366883
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,4,2,128,1,float16,float16,0,0.02889440059661865
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,4,2,128,1,float16,fp8,0,0.030857598781585692
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,4,4,128,1,float16,float16,0,0.028918400406837463
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,4,4,128,1,float16,fp8,0,0.028863999247550964
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,4,4,128,1,float16,fp8,0,0.20893280506134032
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,4,1,128,1,float16,float16,0,0.02887679934501648
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,4,1,128,1,float16,fp8,0,0.02887359857559204
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,4,1,128,1,fp8,fp8,0,0.02887679934501648
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,4,2,128,1,float16,float16,0,0.028838399052619933
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,4,2,128,1,float16,fp8,0,0.028915199637413024
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,4,2,128,1,fp8,fp8,0,0.028867200016975403
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,4,1,128,1,float16,float16,0,0.21476640701293945
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,4,4,128,1,float16,fp8,0,0.14590239524841309
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,4,1,128,1,float16,fp8,0,0.3018687963485718
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,4,1,128,1,float16,float16,0,0.11362719535827637
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,4,1,128,1,fp8,fp8,0,0.30342559814453124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,4,2,128,1,float16,float16,0,0.21848640441894532
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,4,2,128,1,float16,fp8,0,0.3026992082595825
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,4,2,128,1,fp8,fp8,0,0.1434991955757141
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,4,4,128,1,float16,float16,0,0.13922560214996338
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,4,4,128,1,float16,fp8,0,0.1615839958190918
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,4,1,128,1,float16,float16,0,0.11685760021209717
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,4,4,128,1,fp8,fp8,0,0.16059839725494385
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,4,1,128,1,float16,fp8,0,0.16047040224075318
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,4,2,128,1,float16,float16,0,0.11816799640655518
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,4,1,128,1,fp8,fp8,0,0.15940480232238768
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,4,2,128,1,float16,fp8,0,0.1603808045387268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,4,4,128,1,float16,float16,0,0.0788320004940033
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,4,4,128,1,float16,fp8,0,0.08901919722557068
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,4,4,128,1,fp8,fp8,0,0.08847519755363464
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,4,1,128,1,float16,float16,0,0.06721760034561157
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,4,1,128,1,float16,fp8,0,0.08638079762458802
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,4,1,128,1,fp8,fp8,0,0.08749279975891114
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,4,2,128,1,float16,float16,0,0.06855040192604064
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,4,2,128,1,float16,fp8,0,0.08756800293922425
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,4,4,128,1,float16,float16,0,0.04746400117874146
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,4,2,128,1,fp8,fp8,0,0.0870639979839325
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,4,4,128,1,float16,fp8,0,0.051551997661590576
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,4,4,128,1,fp8,fp8,0,0.0514735996723175
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,4,1,128,1,float16,fp8,0,0.030899199843406677
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,4,1,128,1,float16,fp8,0,0.051420801877975465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,4,2,128,1,fp8,fp8,0,0.03089759945869446
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,4,1,128,1,fp8,fp8,0,0.05141760110855102
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,4,2,128,1,float16,float16,0,0.04213280081748962
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,4,2,128,1,float16,fp8,0,0.05141760110855102
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,4,4,128,1,float16,float16,0,0.028835201263427736
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,4,4,128,1,float16,fp8,0,0.030883198976516722
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,4,4,128,1,fp8,fp8,0,0.03094879984855652
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,4,1,128,1,float16,float16,0,0.026859200000762938
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,4,1,128,1,float16,fp8,0,0.030963200330734252
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,4,1,128,1,fp8,fp8,0,0.030852800607681273
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,4,2,128,1,float16,float16,0,0.02686559855937958
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,4,2,128,1,float16,fp8,0,0.030870398879051207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,4,2,128,1,fp8,fp8,0,0.03096640110015869
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,4,4,128,1,float16,float16,0,0.024724799394607543
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,4,4,128,1,float16,fp8,0,0.026561599969863892
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,4,4,128,1,fp8,fp8,0,0.02585119903087616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,4,1,128,1,float16,float16,0,0.024113599956035615
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,4,1,128,1,float16,fp8,0,0.026063999533653258
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,4,1,128,1,fp8,fp8,0,0.024825599789619446
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,4,2,128,1,float16,float16,0,0.02401600033044815
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,4,2,128,1,float16,fp8,0,0.02486560046672821
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,4,2,128,1,fp8,fp8,0,0.026763200759887695
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,4,4,128,1,float16,float16,0,0.022787199914455415
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,4,4,128,1,fp8,fp8,0,0.023107199370861052
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,4,1,128,1,float16,float16,0,0.022763200104236603
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,4,1,128,1,float16,fp8,0,0.02298559993505478
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,4,1,128,1,fp8,fp8,0,0.022759999334812164
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,4,2,128,1,float16,fp8,0,0.022884799540042876
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,4,2,128,1,fp8,fp8,0,0.02284799963235855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,4,4,128,1,float16,float16,0,0.022785599529743194
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,4,4,128,1,float16,fp8,0,0.022756800055503845
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,4,2,128,1,fp8,fp8,0,0.16013280153274537
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,4,4,128,1,fp8,fp8,0,0.022776000201702118
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,4,1,128,1,float16,float16,0,0.02263839989900589
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,4,1,128,1,float16,fp8,0,0.022771200537681578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,4,1,128,1,fp8,fp8,0,0.0227183997631073
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,4,2,128,1,float16,float16,0,0.02266719937324524
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,4,2,128,1,float16,fp8,0,0.02268799990415573
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,4,2,128,1,fp8,fp8,0,0.022672000527381896
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,4,1,128,1,float16,float16,0,0.16469919681549072
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,4,1,128,1,float16,fp8,0,0.2570159912109375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,4,1,128,1,fp8,fp8,0,0.25851199626922605
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,4,1,128,1,float16,float16,0,0.04213759899139404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,4,2,128,1,float16,float16,0,0.1684191942214966
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,4,4,128,1,float16,float16,0,0.11271359920501708
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,4,4,128,1,fp8,fp8,0,0.028937599062919615
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,4,2,128,1,fp8,fp8,0,0.051419198513031006
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,4,2,128,1,fp8,fp8,0,0.2588160037994385
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,4,4,128,1,float16,fp8,0,0.1366927981376648
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,4,4,128,1,fp8,fp8,0,0.13732800483703614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,4,1,128,1,float16,float16,0,0.09166399836540222
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,4,1,128,1,fp8,fp8,0,0.13552320003509521
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,4,2,128,1,float16,fp8,0,0.13578720092773439
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,4,2,128,1,float16,float16,0,0.09337440133094788
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,4,2,128,1,fp8,fp8,0,0.13578720092773439
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,4,4,128,1,float16,float16,0,0.06375359892845153
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,4,4,128,1,float16,fp8,0,0.07552000284194946
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,4,4,128,1,fp8,fp8,0,0.07416960000991821
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,4,1,128,1,float16,float16,0,0.05203679800033569
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,4,1,128,1,float16,fp8,0,0.07369120121002197
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,4,2,128,1,float16,float16,0,0.05375360250473023
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,4,1,128,1,fp8,fp8,0,0.07264800071716308
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,4,2,128,1,float16,fp8,0,0.07401919960975648
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,4,2,128,1,fp8,fp8,0,0.07203680276870728
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,4,4,128,1,float16,fp8,0,0.02306240051984787
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,4,4,128,1,float16,float16,0,0.038108798861503604
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,4,2,128,1,fp8,fp8,0,0.30236799716949464
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,4,4,128,1,fp8,fp8,0,0.0428272008895874
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,4,1,128,1,float16,fp8,0,0.04263519942760467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,4,1,128,1,float16,float16,0,0.03267680108547211
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,4,2,128,1,float16,float16,0,0.022755199670791627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,4,1,128,1,fp8,fp8,0,0.04232319891452789
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,4,2,128,1,float16,fp8,0,0.04311839938163757
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,4,2,128,1,fp8,fp8,0,0.04237920045852661
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,4,4,128,1,float16,float16,0,0.022681599855422972
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,4,4,128,1,float16,fp8,0,0.02484800070524216
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,4,4,128,1,fp8,fp8,0,0.024825599789619446
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,4,1,128,1,float16,float16,0,0.020691199600696562
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,4,1,128,1,float16,fp8,0,0.02483679950237274
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,4,1,128,1,fp8,fp8,0,0.024831999838352204
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,4,2,128,1,float16,fp8,0,0.024835200607776643
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,4,2,128,1,fp8,fp8,0,0.024803200364112855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,4,4,128,1,float16,float16,0,0.018692800402641298
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,4,4,128,1,float16,fp8,0,0.020735999941825865
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,4,4,128,1,fp8,fp8,0,0.02067999988794327
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,4,1,128,1,float16,float16,0,0.018622399866580965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,4,1,128,1,float16,fp8,0,0.020703999698162077
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,4,1,128,1,fp8,fp8,0,0.020710399746894835
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,4,2,128,1,float16,float16,0,0.017892800271511078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,4,2,128,1,fp8,fp8,0,0.020688000321388244
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,4,2,128,1,float16,fp8,0,0.02067359983921051
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,4,4,128,1,float16,float16,0,0.016620799899101257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,4,4,128,1,float16,fp8,0,0.0186256006360054
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,4,1,128,1,float16,float16,0,0.01656319946050644
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,4,4,128,1,fp8,fp8,0,0.018651199340820313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,4,1,128,1,float16,fp8,0,0.018624000251293182
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,4,1,128,1,fp8,fp8,0,0.01863519996404648
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,4,2,128,1,float16,float16,0,0.016659200191497803
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,4,2,128,1,float16,fp8,0,0.018620799481868743
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,4,2,128,1,fp8,fp8,0,0.017479999363422392
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,4,4,128,1,float16,float16,0,0.016607999801635742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,4,4,128,1,float16,fp8,0,0.016564799845218657
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,4,4,128,1,fp8,fp8,0,0.016595199704170227
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,4,1,128,1,float16,float16,0,0.01656160056591034
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,4,1,128,1,float16,fp8,0,0.01659200042486191
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,4,1,128,1,fp8,fp8,0,0.01658879965543747
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,4,2,128,1,float16,float16,0,0.01658560037612915
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,4,2,128,1,float16,fp8,0,0.25789918899536135
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,4,2,128,1,float16,fp8,0,0.01663520038127899
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,4,2,128,1,fp8,fp8,0,0.016612799465656282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,4,4,128,1,float16,float16,0,0.016551999747753142
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,4,4,128,1,float16,fp8,0,0.016574400663375854
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,4,4,128,1,fp8,fp8,0,0.016582399606704712
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,4,1,128,1,float16,float16,0,0.016527999937534333
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,4,1,128,1,float16,fp8,0,0.016596800088882445
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,4,1,128,1,fp8,fp8,0,0.016539199650287627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,4,2,128,1,float16,float16,0,0.01650879979133606
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,4,2,128,1,float16,fp8,0,0.016603200137615202
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,4,2,128,1,fp8,fp8,0,0.016627199947834015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,4,1,128,1,float16,float16,0,0.07631679773330688
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,4,1,128,1,float16,fp8,0,0.1200368046760559
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,4,2,128,1,float16,float16,0,0.07770559787750245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,4,1,128,1,fp8,fp8,0,0.11968159675598145
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,4,2,128,1,float16,fp8,0,0.12096960544586181
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,4,2,128,1,fp8,fp8,0,0.1208575963973999
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,4,4,128,1,float16,float16,0,0.05563039779663086
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,4,4,128,1,float16,fp8,0,0.06779360175132751
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,4,1,128,1,float16,float16,0,0.04432480037212372
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,4,4,128,1,fp8,fp8,0,0.06782559752464294
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,4,1,128,1,float16,fp8,0,0.06579679846763611
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,4,1,128,1,fp8,fp8,0,0.06573439836502075
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,4,4,128,1,float16,fp8,0,0.043252798914909366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,4,2,128,1,float16,fp8,0,0.06572800278663635
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,4,2,128,1,float16,float16,0,0.03266400098800659
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,4,2,128,1,fp8,fp8,0,0.0657584011554718
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,4,4,128,1,float16,fp8,0,0.03706560134887695
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,4,1,128,1,float16,float16,0,0.024801599979400634
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,4,4,128,1,fp8,fp8,0,0.03702079951763153
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,4,2,128,1,float16,float16,0,0.020532800257205962
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,4,1,128,1,fp8,fp8,0,0.03711999952793121
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,4,2,128,1,float16,float16,0,0.026471999287605286
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,4,2,128,1,float16,fp8,0,0.03707999885082245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,4,2,128,1,fp8,fp8,0,0.0370608001947403
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,4,4,128,1,float16,fp8,0,0.022729599475860597
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,4,1,128,1,float16,float16,0,0.016599999368190767
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,4,1,128,1,float16,fp8,0,0.02269120067358017
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,4,1,128,1,fp8,fp8,0,0.022700800001621245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,4,2,128,1,float16,float16,0,0.016630400717258454
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,4,2,128,1,float16,fp8,0,0.022753599286079406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,4,2,128,1,fp8,fp8,0,0.022697600722312927
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,4,4,128,1,float16,float16,0,0.016599999368190767
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,4,4,128,1,float16,fp8,0,0.016675199568271636
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,4,4,128,1,fp8,fp8,0,0.016606399416923524
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,4,1,128,1,float16,float16,0,0.014567999541759491
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,4,1,128,1,float16,fp8,0,0.016638399660587312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,4,1,128,1,fp8,fp8,0,0.01663520038127899
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,4,2,128,1,float16,float16,0,0.014572800695896148
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,4,1,128,1,float16,fp8,0,0.13665599822998048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,4,2,128,1,float16,fp8,0,0.016731199622154237
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,4,2,128,1,fp8,fp8,0,0.016867199540138246
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,4,4,128,1,float16,float16,0,0.014595200121402741
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,4,4,128,1,float16,fp8,0,0.014608000218868256
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,4,1,128,1,float16,float16,0,0.014556799829006196
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,4,4,128,1,fp8,fp8,0,0.014614400267601014
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,4,1,128,1,float16,fp8,0,0.01454399973154068
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,4,1,128,1,fp8,fp8,0,0.014535999298095703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,4,2,128,1,float16,float16,0,0.014382399618625641
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,4,2,128,1,fp8,fp8,0,0.014659200608730317
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,4,4,128,1,float16,float16,0,0.01313759982585907
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,4,4,128,1,float16,fp8,0,0.014563199877738953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,4,4,128,1,fp8,fp8,0,0.014473600685596466
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,4,1,128,1,float16,float16,0,0.012931199371814727
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,4,1,128,1,float16,fp8,0,0.014486399292945863
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,4,1,128,1,fp8,fp8,0,0.0144896000623703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,4,2,128,1,float16,float16,0,0.012464000284671784
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,4,2,128,1,float16,float16,0,0.04673919975757599
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,4,2,128,1,float16,fp8,0,0.014556799829006196
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,4,2,128,1,fp8,fp8,0,0.014486399292945863
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,4,4,128,1,float16,float16,0,0.012782399356365205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,4,4,128,1,float16,fp8,0,0.012654399871826172
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,4,4,128,1,fp8,fp8,0,0.013995200395584106
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,4,4,128,1,float16,float16,0,0.031676799058914185
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,4,1,128,1,float16,float16,0,0.012534399330615998
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,4,1,128,1,float16,fp8,0,0.013697600364685059
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,4,1,128,1,fp8,fp8,0,0.012495999783277511
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,4,2,128,1,float16,fp8,0,0.013857600092887879
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,4,2,128,1,fp8,fp8,0,0.012531200051307678
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,4,2,128,1,float16,float16,0,0.012539200484752655
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,4,4,128,1,float16,float16,0,0.01276639997959137
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,4,4,128,1,float16,fp8,0,0.012505599856376648
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,4,4,128,1,fp8,fp8,0,0.012540799379348756
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,4,4,128,1,float16,float16,0,0.01871519982814789
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,4,1,128,1,float16,fp8,0,0.012529599666595458
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,4,1,128,1,fp8,fp8,0,0.012481600046157837
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,4,4,128,1,fp8,fp8,0,0.02279199957847595
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,4,1,128,1,float16,float16,0,0.012742400169372559
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,4,2,128,1,float16,fp8,0,0.012505599856376648
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,4,2,128,1,fp8,fp8,0,0.012529599666595458
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,4,1,128,1,float16,float16,0,0.047502401471138
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,4,1,128,1,float16,fp8,0,0.06921759843826295
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,4,2,128,1,float16,float16,0,0.049267199635505673
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,4,1,128,1,fp8,fp8,0,0.0690559983253479
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,4,2,128,1,float16,fp8,0,0.06976159811019897
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,4,4,128,1,float16,float16,0,0.034955200552940366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,4,4,128,1,float16,fp8,0,0.0395552009344101
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,4,2,128,1,fp8,fp8,0,0.0691536009311676
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,4,1,128,1,float16,float16,0,0.028889599442481994
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,4,1,128,1,fp8,fp8,0,0.03912160098552704
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,4,2,128,1,float16,float16,0,0.02892799973487854
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,4,2,128,1,float16,fp8,0,0.03918400108814239
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,4,2,128,1,fp8,fp8,0,0.03915199935436249
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,4,4,128,1,float16,float16,0,0.02070239931344986
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,4,4,128,1,float16,fp8,0,0.022782400250434875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,4,4,128,1,fp8,fp8,0,0.022732800245285033
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,4,1,128,1,float16,float16,0,0.018580800294876097
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,4,1,128,1,float16,fp8,0,0.02274080067873001
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,4,1,128,1,fp8,fp8,0,0.02284960001707077
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,4,2,128,1,float16,fp8,0,0.022758400440216063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,4,2,128,1,fp8,fp8,0,0.022787199914455415
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,4,4,128,1,float16,float16,0,0.013435199856758118
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,4,4,128,1,float16,fp8,0,0.014564800262451171
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,4,4,128,1,fp8,fp8,0,0.014580799639225006
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,4,1,128,1,float16,float16,0,0.012508800625801087
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,4,1,128,1,float16,fp8,0,0.014572800695896148
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,4,2,128,1,float16,fp8,0,0.014511999487876893
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,4,1,128,1,fp8,fp8,0,0.01465120017528534
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,4,2,128,1,float16,float16,0,0.012455999851226807
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,4,2,128,1,float16,fp8,0,0.014561599493026734
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,4,2,128,1,fp8,fp8,0,0.01451359987258911
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,4,4,128,1,float16,float16,0,0.012428800016641617
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,4,4,128,1,float16,fp8,0,0.012470400333404541
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,4,4,128,1,fp8,fp8,0,0.01252799928188324
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,4,1,128,1,float16,fp8,0,0.012531200051307678
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,4,1,128,1,fp8,fp8,0,0.012505599856376648
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,4,2,128,1,float16,float16,0,0.010553599894046783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,4,2,128,1,float16,fp8,0,0.01255200058221817
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,4,2,128,1,fp8,fp8,0,0.012507200241088867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,4,1,128,1,float16,fp8,0,0.03712159991264343
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,4,4,128,1,float16,float16,0,0.010465600341558457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,4,4,128,1,float16,fp8,0,0.010531199723482132
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,4,4,128,1,fp8,fp8,0,0.011142399907112122
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,4,1,128,1,float16,float16,0,0.010523200035095215
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,4,2,128,1,float16,float16,0,0.012513600289821625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,4,1,128,1,fp8,fp8,0,0.010779199749231338
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,4,2,128,1,float16,fp8,0,0.010796800255775452
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,4,2,128,1,fp8,fp8,0,0.01185920014977455
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,4,4,128,1,float16,float16,0,0.010489600151777268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,4,4,128,1,float16,fp8,0,0.010500799864530563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,4,4,128,1,fp8,fp8,0,0.010542400181293488
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,4,1,128,1,float16,float16,0,0.01040000021457672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,4,1,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,4,4,128,1,fp8,fp8,0,0.03910239934921265
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,4,1,128,1,fp8,fp8,0,0.010412800312042236
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,4,1,128,1,float16,fp8,0,0.03915199935436249
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,4,2,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,4,4,128,1,float16,float16,0,0.01045759990811348
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,4,4,128,1,float16,fp8,0,0.010463999956846238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,4,4,128,1,fp8,fp8,0,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,4,1,128,1,float16,float16,0,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,4,1,128,1,float16,fp8,0,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,4,2,128,1,float16,float16,0,0.01037440001964569
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,4,2,128,1,float16,fp8,0,0.010451199859380722
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,4,2,128,1,float16,float16,0,0.01846559941768646
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,4,2,128,1,fp8,fp8,0,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,4,4,128,1,float16,float16,0,0.010454399883747101
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,4,4,128,1,float16,fp8,0,0.0104032002389431
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,4,4,128,1,fp8,fp8,0,0.010471999645233154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,4,1,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,4,1,128,1,float16,fp8,0,0.010486400127410889
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,4,1,128,1,fp8,fp8,0,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,4,2,128,1,float16,float16,0,0.01048320010304451
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,4,2,128,1,float16,fp8,0,0.010467199981212616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,4,2,128,1,fp8,fp8,0,0.010494399815797806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,4,1,128,1,float16,float16,0,0.03830400109291077
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,4,1,128,1,float16,fp8,0,0.04732640087604523
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,4,1,128,1,fp8,fp8,0,0.047286400198936464
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,4,2,128,1,float16,float16,0,0.03895840048789978
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,4,2,128,1,float16,fp8,0,0.047356799244880676
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,4,2,128,1,fp8,fp8,0,0.047332799434661864
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,4,4,128,1,float16,float16,0,0.02499680072069168
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,4,4,128,1,float16,fp8,0,0.028806400299072266
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,4,1,128,1,float16,float16,0,0.010622400045394897
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,4,4,128,1,fp8,fp8,0,0.0288239985704422
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,4,1,128,1,float16,float16,0,0.02454880028963089
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,4,1,128,1,float16,fp8,0,0.028947201371192933
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,4,1,128,1,fp8,fp8,0,0.02881920039653778
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,4,2,128,1,float16,float16,0,0.02438720017671585
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,4,2,128,1,float16,fp8,0,0.028860801458358766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,4,2,128,1,fp8,fp8,0,0.028812798857688903
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,4,4,128,1,float16,float16,0,0.016569599509239197
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,4,4,128,1,float16,fp8,0,0.01857600063085556
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,4,4,128,1,fp8,fp8,0,0.018590399622917177
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,4,1,128,1,float16,float16,0,0.016476799547672272
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,4,1,128,1,float16,fp8,0,0.01857440024614334
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,4,1,128,1,fp8,fp8,0,0.018572799861431122
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,4,1,128,1,float16,fp8,0,0.010540799796581268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,4,2,128,1,float16,float16,0,0.015505599975585937
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,4,2,128,1,float16,fp8,0,0.01860959976911545
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,4,4,128,1,float16,float16,0,0.012433599680662155
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,4,2,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,4,2,128,1,fp8,fp8,0,0.0186831995844841
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,4,4,128,1,fp8,fp8,0,0.012432000041007996
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,4,1,128,1,float16,float16,0,0.011870399862527848
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,4,1,128,1,float16,fp8,0,0.01255359947681427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,4,1,128,1,fp8,fp8,0,0.012460800260305405
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,4,2,128,1,float16,float16,0,0.012438400089740754
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,4,2,128,1,float16,fp8,0,0.01255200058221817
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,4,2,128,1,fp8,fp8,0,0.012535999715328216
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,4,4,128,1,float16,float16,0,0.010444799810647965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,4,4,128,1,float16,fp8,0,0.010599999874830245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,4,2,128,1,float16,float16,0,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,4,2,128,1,fp8,fp8,0,0.010496000200510025
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,4,1,128,1,float16,fp8,0,0.010487999767065048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,4,1,128,1,fp8,fp8,0,0.010659199953079224
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,4,1,128,1,float16,float16,0,0.0104032002389431
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,4,2,128,1,float16,float16,0,0.010424000024795533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,4,4,128,1,float16,float16,0,0.010438399761915207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,4,2,128,1,fp8,fp8,0,0.0105103999376297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,4,4,128,1,fp8,fp8,0,0.01045759990811348
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,4,1,128,1,float16,float16,0,0.010411199927330018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,4,1,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,4,1,128,1,fp8,fp8,0,0.01043040007352829
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,4,2,128,1,float16,fp8,0,0.010440000146627427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,4,2,128,1,fp8,fp8,0,0.010452800244092942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,4,4,128,1,float16,float16,0,0.010353600233793258
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,4,4,128,1,float16,fp8,0,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,4,4,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,4,1,128,1,float16,float16,0,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,4,1,128,1,float16,fp8,0,0.010393600165843963
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,4,1,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,4,2,128,1,float16,float16,0,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,4,2,128,1,float16,fp8,0,0.010424000024795533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,4,2,128,1,fp8,fp8,0,0.01045600026845932
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,4,4,128,1,float16,float16,0,0.01048159971833229
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,4,4,128,1,float16,fp8,0,0.01037919968366623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,4,4,128,1,fp8,fp8,0,0.010428799688816071
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,4,1,128,1,float16,float16,0,0.010412800312042236
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,4,1,128,1,float16,fp8,0,0.010427200049161912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,4,1,128,1,fp8,fp8,0,0.010276799649000167
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,4,2,128,1,float16,float16,0,0.010468800365924836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,4,2,128,1,float16,fp8,0,0.01045600026845932
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,4,2,128,1,fp8,fp8,0,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,4,4,128,1,float16,float16,0,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,4,4,128,1,float16,fp8,0,0.010409600287675857
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,4,4,128,1,fp8,fp8,0,0.010411199927330018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,4,4,128,1,float16,fp8,0,0.012488000094890594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,4,1,128,1,float16,float16,0,0.00984639972448349
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,4,1,128,1,float16,fp8,0,0.010443200170993806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,4,1,128,1,fp8,fp8,0,0.00950080007314682
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,4,2,128,1,float16,float16,0,0.01040000021457672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,4,2,128,1,float16,fp8,0,0.009556800127029419
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,4,4,128,1,fp8,fp8,0,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,4,2,128,1,fp8,fp8,0,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,4,1,128,1,float16,float16,0,0.03357760012149811
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,4,2,128,1,float16,fp8,0,0.010492800176143647
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,4,1,128,1,fp8,fp8,0,0.037088000774383546
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,4,2,128,1,float16,float16,0,0.03329919874668121
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,4,4,128,1,float16,fp8,0,0.010414399951696397
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,4,4,128,1,float16,float16,0,0.022734400629997254
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,4,2,128,1,float16,fp8,0,0.03712159991264343
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,4,2,128,1,fp8,fp8,0,0.037108799815177916
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,4,4,128,1,float16,fp8,0,0.023491199314594268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,4,4,128,1,fp8,fp8,0,0.022785599529743194
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,4,1,128,1,float16,float16,0,0.021388800442218782
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,4,2,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,4,1,128,1,float16,fp8,0,0.02276960015296936
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,4,1,128,1,fp8,fp8,0,0.02279199957847595
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,4,2,128,1,float16,float16,0,0.020689600706100465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,4,2,128,1,float16,fp8,0,0.022776000201702118
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,4,4,128,1,float16,float16,0,0.014612799882888794
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,4,4,128,1,float16,fp8,0,0.01456640064716339
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,4,1,128,1,float16,float16,0,0.014580799639225006
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,4,1,128,1,float16,fp8,0,0.015041600167751312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,4,1,128,1,fp8,fp8,0,0.014985600113868713
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,4,2,128,1,float16,float16,0,0.014542399346828461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,4,2,128,1,float16,fp8,0,0.014678399264812469
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,4,4,128,1,float16,float16,0,0.010438399761915207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,4,4,128,1,float16,fp8,0,0.010471999645233154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,4,4,128,1,fp8,fp8,0,0.01055999994277954
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,4,1,128,1,float16,float16,0,0.010567999631166457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,4,1,128,1,float16,fp8,0,0.010487999767065048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,4,1,128,1,fp8,fp8,0,0.010628800094127654
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,4,2,128,1,float16,float16,0,0.01048159971833229
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,4,2,128,1,float16,fp8,0,0.010491199791431427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,4,2,128,1,fp8,fp8,0,0.010502400249242783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,4,4,128,1,float16,float16,0,0.010494399815797806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,4,4,128,1,float16,fp8,0,0.010489600151777268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,4,4,128,1,fp8,fp8,0,0.010480000078678131
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,4,1,128,1,float16,float16,0,0.010446400195360184
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,4,1,128,1,float16,fp8,0,0.010505600273609162
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,4,1,128,1,fp8,fp8,0,0.010492800176143647
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,4,2,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,4,2,128,1,fp8,fp8,0,0.010487999767065048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,4,4,128,1,float16,float16,0,0.01045600026845932
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,4,4,128,1,float16,fp8,0,0.010396800190210342
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,4,4,128,1,fp8,fp8,0,0.01037919968366623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,4,1,128,1,float16,float16,0,0.010462400317192078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,4,1,128,1,float16,fp8,0,0.010468800365924836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,4,1,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,4,2,128,1,float16,float16,0,0.010384000092744827
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,4,2,128,1,float16,fp8,0,0.010487999767065048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,4,2,128,1,fp8,fp8,0,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,4,4,128,1,float16,float16,0,0.010380800068378448
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,4,4,128,1,float16,fp8,0,0.009379199892282485
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,4,4,128,1,fp8,fp8,0,0.009352000057697296
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,4,1,128,1,float16,float16,0,0.009430400282144546
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,4,1,128,1,float16,fp8,0,0.03705439865589142
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,4,1,128,1,float16,fp8,0,0.008950400352478027
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,4,1,128,1,fp8,fp8,0,0.00870719999074936
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,4,2,128,1,float16,float16,0,0.00942559987306595
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,4,2,128,1,float16,fp8,0,0.010398399829864503
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,4,2,128,1,fp8,fp8,0,0.009441599994897843
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,4,1,128,1,fp8,fp8,0,0.010435199737548828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,4,4,128,1,float16,float16,0,0.01043360009789467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,4,4,128,1,float16,fp8,0,0.009052799642086029
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,4,1,128,1,float16,float16,0,0.010375999659299851
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,4,2,128,1,fp8,fp8,0,0.02332639992237091
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,4,1,128,1,float16,fp8,0,0.00859839990735054
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,4,4,128,1,fp8,fp8,0,0.008502399921417237
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,4,1,128,1,fp8,fp8,0,0.009356799721717834
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,4,2,128,1,float16,float16,0,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,4,2,128,1,float16,fp8,0,0.010380800068378448
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,4,2,128,1,fp8,fp8,0,0.00857279971241951
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,4,4,128,1,float16,fp8,0,0.009726399928331375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,4,4,128,1,fp8,fp8,0,0.010375999659299851
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,4,4,128,1,float16,float16,0,0.008908800035715102
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,4,1,128,1,float16,fp8,0,0.009758400171995163
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,4,2,128,1,fp8,fp8,0,0.01565439999103546
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,4,2,128,1,float16,float16,0,0.008561599999666214
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,4,2,128,1,float16,fp8,0,0.008401600271463394
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,4,2,128,1,fp8,fp8,0,0.009286399930715561
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,4,1,128,1,float16,fp8,0,0.033169600367546084
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,4,1,128,1,fp8,fp8,0,0.03299039900302887
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,4,2,128,1,float16,float16,0,0.03287520110607147
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,4,2,128,1,float16,fp8,0,0.033025598526000975
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,4,4,128,1,float16,float16,0,0.02081120014190674
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,4,2,128,1,fp8,fp8,0,0.033102399110794066
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,4,4,128,1,float16,fp8,0,0.021065600216388702
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,4,4,128,1,fp8,fp8,0,0.020662400126457214
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,4,1,128,1,float16,fp8,0,0.02072480022907257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,4,1,128,1,fp8,fp8,0,0.020694400370121
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,4,2,128,1,float16,float16,0,0.020660799741744996
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,4,2,128,1,float16,fp8,0,0.02075839936733246
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,4,2,128,1,fp8,fp8,0,0.020703999698162077
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,4,4,128,1,float16,float16,0,0.014535999298095703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,4,4,128,1,float16,fp8,0,0.014502400159835815
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,4,4,128,1,fp8,fp8,0,0.014580799639225006
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,4,1,128,1,float16,fp8,0,0.014537599682807923
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,4,1,128,1,fp8,fp8,0,0.014505599439144135
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,4,2,128,1,float16,float16,0,0.014419199526309967
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,4,2,128,1,float16,fp8,0,0.014502400159835815
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,4,2,128,1,fp8,fp8,0,0.014521600306034088
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,4,4,128,1,float16,float16,0,0.010428799688816071
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,4,4,128,1,float16,fp8,0,0.010468800365924836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,4,4,128,1,fp8,fp8,0,0.010436800122261048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,4,1,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,4,1,128,1,float16,fp8,0,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,4,1,128,1,fp8,fp8,0,0.01048320010304451
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,4,2,128,1,float16,float16,0,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,4,2,128,1,float16,fp8,0,0.01045600026845932
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,4,2,128,1,fp8,fp8,0,0.010462400317192078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,4,4,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,4,4,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,4,4,128,1,fp8,fp8,0,0.010487999767065048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,4,1,128,1,float16,float16,0,0.010419200360774993
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,4,4,128,1,fp8,fp8,0,0.014689600467681885
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,4,1,128,1,float16,fp8,0,0.010452800244092942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,4,2,128,1,float16,float16,0,0.010409600287675857
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,4,1,128,1,fp8,fp8,0,0.010388799756765366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,4,1,128,1,float16,float16,0,0.00841120034456253
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,4,2,128,1,float16,fp8,0,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,4,2,128,1,fp8,fp8,0,0.010377600044012069
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,4,4,128,1,float16,float16,0,0.009404800087213516
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,4,4,128,1,float16,fp8,0,0.00939679965376854
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,4,4,128,1,fp8,fp8,0,0.010355199873447418
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,4,1,128,1,float16,float16,0,0.009736000001430512
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,4,1,128,1,float16,float16,0,0.033024001121521
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,4,1,128,1,float16,fp8,0,0.00905120000243187
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,4,1,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,4,2,128,1,float16,fp8,0,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,4,4,128,1,float16,float16,0,0.009780800342559815
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,4,1,128,1,float16,float16,0,0.020692799985408784
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,4,4,128,1,fp8,fp8,0,0.010388799756765366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,4,1,128,1,float16,float16,0,0.010385599732398988
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,4,2,128,1,float16,float16,0,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,4,1,128,1,float16,fp8,0,0.009550400078296661
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,4,1,128,1,fp8,fp8,0,0.010169599950313569
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,4,2,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,4,1,128,1,float16,float16,0,0.014472000300884247
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,4,2,128,1,fp8,fp8,0,0.010777600109577179
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,4,2,128,1,float16,fp8,0,0.008504000306129456
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,4,4,128,1,float16,float16,0,0.009681600332260131
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,4,4,128,1,float16,fp8,0,0.009033600240945816
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,4,4,128,1,fp8,fp8,0,0.00846719965338707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,4,1,128,1,float16,float16,0,0.009571199864149093
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,4,1,128,1,float16,fp8,0,0.008436799794435502
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,4,1,128,1,fp8,fp8,0,0.008399999886751174
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,4,2,128,1,float16,float16,0,0.008427199721336365
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,4,2,128,1,fp8,fp8,0,0.008481600135564805
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,4,4,128,1,float16,float16,0,0.008617600053548813
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,4,4,128,1,float16,fp8,0,0.00841279998421669
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,4,4,128,1,fp8,fp8,0,0.008379200100898742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,4,1,128,1,float16,float16,0,0.008454400300979614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,4,1,128,1,float16,fp8,0,0.008459199965000153
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,4,1,128,1,fp8,fp8,0,0.009275200217962265
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,4,2,128,1,float16,float16,0,0.008459199965000153
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,4,2,128,1,fp8,fp8,0,0.008372800052165985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,4,1,128,1,float16,float16,0,0.030963200330734252
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,4,1,128,1,float16,fp8,0,0.028908801078796387
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,4,1,128,1,fp8,fp8,0,0.029019200801849367
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,4,2,128,1,float16,float16,0,0.0312175989151001
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,4,2,128,1,float16,fp8,0,0.02905600070953369
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,4,2,128,1,fp8,fp8,0,0.029028800129890443
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,4,4,128,1,float16,float16,0,0.02064319998025894
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,4,4,128,1,float16,fp8,0,0.018713599443435668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,4,4,128,1,fp8,fp8,0,0.019126400351524353
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,4,1,128,1,float16,float16,0,0.02062080055475235
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,4,1,128,1,float16,fp8,0,0.018598400056362152
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,4,1,128,1,fp8,fp8,0,0.01871519982814789
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,4,1,128,1,fp8,fp8,0,0.010175999999046326
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,4,2,128,1,float16,float16,0,0.02022400051355362
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,4,2,128,1,float16,fp8,0,0.018641600012779237
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,4,2,128,1,fp8,fp8,0,0.018614399433135986
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,4,4,128,1,float16,float16,0,0.014596800506114959
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,4,2,128,1,fp8,fp8,0,0.009622400254011154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,4,4,128,1,float16,fp8,0,0.012582400441169738
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,4,4,128,1,fp8,fp8,0,0.013368000090122224
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,4,1,128,1,float16,float16,0,0.01456640064716339
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,4,1,128,1,float16,fp8,0,0.01255200058221817
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,4,4,128,1,float16,fp8,0,0.009081599861383438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,4,1,128,1,fp8,fp8,0,0.012611199915409089
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,4,2,128,1,float16,float16,0,0.01443839967250824
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,4,4,128,1,float16,float16,0,0.010566399991512298
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,4,4,128,1,float16,fp8,0,0.010438399761915207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,4,4,128,1,fp8,fp8,0,0.01048159971833229
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,4,1,128,1,float16,float16,0,0.010465600341558457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,4,1,128,1,float16,fp8,0,0.010471999645233154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,4,1,128,1,fp8,fp8,0,0.010486400127410889
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,4,2,128,1,float16,float16,0,0.010476800054311753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,4,2,128,1,float16,fp8,0,0.010454399883747101
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,4,4,128,1,float16,float16,0,0.010424000024795533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,4,2,128,1,fp8,fp8,0,0.010500799864530563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,4,4,128,1,float16,fp8,0,0.01043040007352829
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,4,4,128,1,fp8,fp8,0,0.010470400005578995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,4,1,128,1,float16,float16,0,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,4,1,128,1,float16,fp8,0,0.010451199859380722
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,4,2,128,1,float16,fp8,0,0.008644799888134002
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,4,1,128,1,fp8,fp8,0,0.010465600341558457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,4,2,128,1,float16,float16,0,0.010486400127410889
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,4,2,128,1,float16,fp8,0,0.01043199971318245
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,4,2,128,1,fp8,fp8,0,0.00945120006799698
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,4,4,128,1,float16,fp8,0,0.00888800024986267
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,4,4,128,1,fp8,fp8,0,0.008499199897050858
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,4,1,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,4,1,128,1,float16,fp8,0,0.008428800106048583
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,4,2,128,1,float16,fp8,0,0.008737599849700928
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,4,1,128,1,fp8,fp8,0,0.008504000306129456
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,4,2,128,1,float16,float16,0,0.008423999696969987
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,4,2,128,1,fp8,fp8,0,0.009419199824333192
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,4,2,128,1,float16,fp8,0,0.008564800024032593
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,4,4,128,1,float16,float16,0,0.010380800068378448
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,4,4,128,1,float16,fp8,0,0.008872000128030777
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,4,4,128,1,fp8,fp8,0,0.008425600081682205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,4,1,128,1,float16,float16,0,0.009140799939632415
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,4,1,128,1,float16,fp8,0,0.009337600320577621
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,4,1,128,1,fp8,fp8,0,0.008459199965000153
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,4,2,128,1,float16,float16,0,0.009404800087213516
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,4,2,128,1,float16,fp8,0,0.008631999790668487
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,4,2,128,1,fp8,fp8,0,0.008433599770069123
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,4,4,128,1,float16,float16,0,0.009065599739551544
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,4,4,128,1,float16,fp8,0,0.008612799644470214
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,4,4,128,1,fp8,fp8,0,0.008425600081682205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,4,1,128,1,float16,float16,0,0.008398400247097015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,4,1,128,1,float16,fp8,0,0.010190399736166
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,4,1,128,1,fp8,fp8,0,0.008491200208663941
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,4,2,128,1,float16,float16,0,0.00843999981880188
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,4,2,128,1,float16,fp8,0,0.008408000320196151
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,4,4,128,1,float16,float16,0,0.008369600027799606
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,4,4,128,1,float16,fp8,0,0.008427199721336365
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,4,4,128,1,fp8,fp8,0,0.008417599648237229
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,4,1,128,1,float16,float16,0,0.008502399921417237
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,4,1,128,1,float16,fp8,0,0.008430399745702744
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,4,2,128,1,float16,float16,0,0.008406399935483932
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,4,2,128,1,float16,fp8,0,0.008755200356245042
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,4,2,128,1,fp8,fp8,0,0.00846560001373291
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,4,2,128,1,float16,float16,0,0.009419199824333192
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,4,2,128,1,float16,fp8,0,0.012555199861526489
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,4,2,128,1,fp8,fp8,0,0.012537600100040435
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,2,1,128,1,float16,float16,0,0.924403190612793
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,2,1,128,1,float16,fp8,0,0.9260335922241211
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,2,2,128,1,float16,float16,0,0.5208928108215332
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,2,2,128,1,float16,fp8,0,0.5404208183288575
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16384,2,1,128,1,fp8,fp8,0,0.9266752243041992
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,4,4,128,1,float16,float16,0,0.010441599786281586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,2,2,128,1,fp8,fp8,0,0.5236703872680664
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,2,1,128,1,float16,float16,0,0.5172768115997315
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,2,1,128,1,float16,fp8,0,0.5395167827606201
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,2,2,128,1,float16,float16,0,0.3212863922119141
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,2,1,128,1,fp8,fp8,0,0.5254608154296875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,2,2,128,1,float16,fp8,0,0.3250688076019287
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,2,2,128,1,fp8,fp8,0,0.32523040771484374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,2,1,128,1,float16,float16,0,0.3287152051925659
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,2,1,128,1,float16,fp8,0,0.324616003036499
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,2,2,128,1,float16,float16,0,0.20769119262695312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,2,2,128,1,float16,fp8,0,0.20786240100860595
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,2,2,128,1,fp8,fp8,0,0.21278879642486573
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,4,2,128,1,fp8,fp8,0,0.010428799688816071
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,2,1,128,1,float16,float16,0,0.2067903995513916
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,2,1,128,1,float16,fp8,0,0.20755040645599365
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,2,1,128,1,fp8,fp8,0,0.206931209564209
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,4,1,128,1,fp8,fp8,0,0.008425600081682205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,2,1,128,1,float16,float16,0,0.56080322265625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,2,2,128,1,float16,fp8,0,0.33540000915527346
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,2,1,128,1,float16,fp8,0,0.5803055763244629
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,2,2,128,1,fp8,fp8,0,0.33696959018707273
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,12288,2,1,128,1,fp8,fp8,0,0.5797103881835938
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,2,1,128,1,float16,float16,0,0.32068800926208496
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,2,1,128,1,float16,fp8,0,0.33477120399475097
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,2,2,128,1,float16,fp8,0,0.21144320964813232
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,2,1,128,1,fp8,fp8,0,0.3356800079345703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,2,2,128,1,float16,float16,0,0.20699520111083985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,2,2,128,1,fp8,fp8,0,0.21139039993286132
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,2,1,128,1,float16,float16,0,0.20594239234924316
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,2,1,128,1,float16,fp8,0,0.21072640419006347
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,2,1,128,1,fp8,fp8,0,0.210809588432312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,2,2,128,1,float16,float16,0,0.1580415964126587
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,2,2,128,1,float16,fp8,0,0.15820800065994262
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,2,2,128,1,fp8,fp8,0,0.15887999534606934
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,2,1,128,1,float16,float16,0,0.15675359964370728
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,2,1,128,1,float16,fp8,0,0.15738879442214965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,2,1,128,1,fp8,fp8,0,0.15855679512023926
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,2,2,128,1,float16,float16,0,0.2493056058883667
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,2,2,128,1,float16,fp8,0,0.26214559078216554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,2,1,128,1,float16,float16,0,0.4068592071533203
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,2,1,128,1,float16,fp8,0,0.43662080764770506
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,10240,2,1,128,1,fp8,fp8,0,0.4370016098022461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,2,2,128,1,fp8,fp8,0,0.26251199245452883
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,2,1,128,1,float16,float16,0,0.24513120651245118
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,2,1,128,1,float16,fp8,0,0.2625056028366089
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,2,2,128,1,float16,float16,0,0.15000640153884887
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,2,2,128,1,float16,fp8,0,0.15820480585098268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,2,1,128,1,fp8,fp8,0,0.26103041172027586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,2,1,128,1,float16,float16,0,0.15129439830780028
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,2,2,128,1,fp8,fp8,0,0.15820000171661378
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,2,1,128,1,fp8,fp8,0,0.15777440071105958
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,2,2,128,1,float16,fp8,0,0.1336591958999634
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,2,2,128,1,fp8,fp8,0,0.133788800239563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,2,1,128,1,float16,float16,0,0.13200960159301758
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,2,1,128,1,float16,fp8,0,0.13422399759292603
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,2,1,128,1,fp8,fp8,0,0.13357280492782592
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,2,1,128,1,fp8,fp8,0,0.32462880611419676
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,2,2,128,1,float16,float16,0,0.28531999588012696
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,2,1,128,1,float16,float16,0,0.5061791896820068
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,2,1,128,1,float16,fp8,0,0.5590240001678467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,8192,2,1,128,1,fp8,fp8,0,0.5576799869537353
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,2,2,128,1,float16,fp8,0,0.31522719860076903
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,2,1,128,1,float16,float16,0,0.2845952033996582
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,2,2,128,1,fp8,fp8,0,0.31473920345306394
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,2,2,128,1,float16,float16,0,0.18023840188980103
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,2,1,128,1,float16,fp8,0,0.3136159896850586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,2,1,128,1,fp8,fp8,0,0.3127392053604126
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,2,2,128,1,float16,float16,0,0.3238687992095947
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,2,2,128,1,fp8,fp8,0,0.19226880073547364
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,2,1,128,1,float16,float16,0,0.17848000526428223
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,2,2,128,1,float16,float16,0,0.11258879899978638
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,2,2,128,1,float16,fp8,0,0.1194208025932312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,2,1,128,1,fp8,fp8,0,0.1919808030128479
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,2,2,128,1,fp8,fp8,0,0.1193120002746582
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,2,1,128,1,float16,float16,0,0.11328639984130859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,2,1,128,1,float16,fp8,0,0.11901439428329467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,2,1,128,1,fp8,fp8,0,0.11936160326004028
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,2,2,128,1,float16,float16,0,0.10859040021896363
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,2,2,128,1,float16,fp8,0,0.10878239870071411
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,2,2,128,1,fp8,fp8,0,0.10930559635162354
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,2,1,128,1,float16,float16,0,0.10926079750061035
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,2,1,128,1,float16,fp8,0,0.10877439975738526
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,2,1,128,1,fp8,fp8,0,0.108787202835083
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,2,1,128,1,float16,float16,0,0.3092639923095703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,2,2,128,1,float16,float16,0,0.18184800148010255
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,2,1,128,1,float16,fp8,0,0.36205120086669923
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,6144,2,1,128,1,fp8,fp8,0,0.36403679847717285
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,2,2,128,1,float16,fp8,0,0.20789918899536133
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,2,2,128,1,fp8,fp8,0,0.20590879917144775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,2,1,128,1,float16,float16,0,0.1805583953857422
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,2,1,128,1,float16,fp8,0,0.20566720962524415
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,2,1,128,1,fp8,fp8,0,0.20519840717315674
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,2,2,128,1,float16,float16,0,0.11797280311584472
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,2,2,128,1,float16,fp8,0,0.12833600044250487
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,2,1,128,1,float16,float16,0,0.11677119731903077
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,2,2,128,1,fp8,fp8,0,0.12811199426651002
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,2,1,128,1,float16,fp8,0,0.1575103998184204
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,2,1,128,1,float16,fp8,0,0.12820160388946533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,2,2,128,1,float16,float16,0,0.132804799079895
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,2,2,128,1,float16,float16,0,0.08718879818916321
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,2,2,128,1,float16,fp8,0,0.091348797082901
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,2,2,128,1,fp8,fp8,0,0.0918287992477417
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,2,1,128,1,float16,float16,0,0.0871392011642456
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,2,1,128,1,float16,fp8,0,0.09212960004806518
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,2,1,128,1,fp8,fp8,0,0.09187359809875488
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,2,2,128,1,float16,fp8,0,0.0842415988445282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,2,2,128,1,fp8,fp8,0,0.08425440192222595
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,2,1,128,1,float16,float16,0,0.08366720080375671
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,2,2,128,1,float16,float16,0,0.08415679931640625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,2,1,128,1,float16,fp8,0,0.08419520258903504
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,2,1,128,1,float16,float16,0,0.29235520362854006
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,2,2,128,1,float16,fp8,0,0.1922976016998291
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,2,1,128,1,float16,fp8,0,0.37179200649261473
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,4096,2,1,128,1,fp8,fp8,0,0.3711184024810791
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,2,1,128,1,float16,fp8,0,0.19132159948348998
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,2,2,128,1,float16,fp8,0,0.20633440017700194
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,2,2,128,1,float16,float16,0,0.1689247965812683
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,2,2,128,1,fp8,fp8,0,0.2066416025161743
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,2,1,128,1,float16,fp8,0,0.20447359085083008
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,2,2,128,1,float16,fp8,0,0.12337759733200074
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,2,2,128,1,fp8,fp8,0,0.12328319549560547
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,2,1,128,1,float16,float16,0,0.10510560274124145
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,2,1,128,1,float16,fp8,0,0.12308160066604615
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,2,1,128,1,fp8,fp8,0,0.12342239618301391
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,2,2,128,1,float16,float16,0,0.06540639996528626
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,2,2,128,1,float16,fp8,0,0.07396000027656555
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,2,2,128,1,fp8,fp8,0,0.07387359738349915
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,2,1,128,1,float16,float16,0,0.06570720076560974
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,2,1,128,1,float16,fp8,0,0.07388479709625244
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,2,1,128,1,fp8,fp8,0,0.07400320172309875
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,2,2,128,1,float16,float16,0,0.05983520150184631
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,2,2,128,1,float16,fp8,0,0.0640720009803772
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,2,1,128,1,float16,float16,0,0.06158080101013184
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,2,1,128,1,float16,fp8,0,0.06375679969787598
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,2,1,128,1,fp8,fp8,0,0.06376000046730042
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,2,1,128,1,fp8,fp8,0,0.12835839986801148
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,2,2,128,1,float16,float16,0,0.05922399759292603
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,2,2,128,1,float16,fp8,0,0.059571200609207155
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,2,2,128,1,fp8,fp8,0,0.05963199734687805
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,2,1,128,1,float16,float16,0,0.05953599810600281
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,2,1,128,1,float16,fp8,0,0.059543997049331665
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,2,1,128,1,fp8,fp8,0,0.05957760214805603
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,2,1,128,1,float16,float16,0,0.18824479579925538
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,2,1,128,1,float16,fp8,0,0.2520816087722778
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,2,1,128,1,fp8,fp8,0,0.08420479893684388
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,2,2,128,1,float16,float16,0,0.11380800008773803
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,2,2,128,1,float16,fp8,0,0.14121919870376587
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,3072,2,1,128,1,fp8,fp8,0,0.25119519233703613
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,2,2,128,1,fp8,fp8,0,0.14126559495925903
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,2,1,128,1,float16,float16,0,0.11149439811706544
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,2,1,128,1,float16,fp8,0,0.14124640226364135
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,2,2,128,1,float16,float16,0,0.07200959920883179
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,2,2,128,1,float16,fp8,0,0.08598399758338929
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,2,1,128,1,fp8,fp8,0,0.1414896011352539
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,2,2,128,1,fp8,fp8,0,0.08550879955291749
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,2,1,128,1,float16,float16,0,0.07190240025520325
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,2,1,128,1,float16,fp8,0,0.08605440258979798
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,2,1,128,1,float16,float16,0,0.16787359714508057
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,2,2,128,1,float16,fp8,0,0.05757759809494019
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,2,2,128,1,float16,float16,0,0.10740799903869629
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,2,1,128,1,float16,float16,0,0.051425600051879884
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,2,1,128,1,float16,fp8,0,0.05756639838218689
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,2,1,128,1,fp8,fp8,0,0.20526399612426757
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,2,2,128,1,float16,float16,0,0.04762240052223206
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,2,2,128,1,float16,fp8,0,0.05129439830780029
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,2,2,128,1,fp8,fp8,0,0.05140320062637329
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,2,1,128,1,float16,float16,0,0.04751999974250794
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,2,1,128,1,fp8,fp8,0,0.05138559937477112
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,2,1,128,1,float16,fp8,0,0.050449597835540774
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,2,2,128,1,float16,float16,0,0.04720160067081451
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,2,2,128,1,float16,fp8,0,0.047284799814224246
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,2,2,128,1,fp8,fp8,0,0.04736959934234619
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,2,1,128,1,float16,float16,0,0.047331199049949646
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,2,1,128,1,float16,fp8,0,0.04739519953727722
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,2,1,128,1,float16,float16,0,0.18847520351409913
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,2,1,128,1,float16,fp8,0,0.2770751953125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,2048,2,1,128,1,fp8,fp8,0,0.2771680116653442
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,2,2,128,1,float16,float16,0,0.10968159437179566
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,2,2,128,1,float16,fp8,0,0.15058079957962037
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,2,2,128,1,fp8,fp8,0,0.15178719758987427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,2,1,128,1,float16,float16,0,0.1080191969871521
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,2,1,128,1,float16,fp8,0,0.15015840530395508
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,2,2,128,1,float16,float16,0,0.06779040098190307
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,2,1,128,1,fp8,fp8,0,0.14948480129241942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,2,2,128,1,float16,fp8,0,0.08624479770660401
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,2,2,128,1,fp8,fp8,0,0.08620160222053527
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,2,1,128,1,float16,float16,0,0.06591039896011353
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,2,1,128,1,float16,fp8,0,0.08624479770660401
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,2,1,128,1,fp8,fp8,0,0.08615999817848205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,2,2,128,1,float16,float16,0,0.04116959869861603
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,2,2,128,1,float16,fp8,0,0.05142880082130432
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,2,2,128,1,float16,float16,0,0.05145599842071533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,2,2,128,1,fp8,fp8,0,0.05146080255508423
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,2,1,128,1,float16,float16,0,0.04119200110435486
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,2,2,128,1,fp8,fp8,0,0.05760480165481567
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,2,1,128,1,float16,fp8,0,0.05142239928245544
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,2,2,128,1,float16,float16,0,0.037031999230384825
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,2,2,128,1,float16,fp8,0,0.041171199083328246
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,2,2,128,1,fp8,fp8,0,0.041142401099205014
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,2,1,128,1,float16,float16,0,0.03710240125656128
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,2,1,128,1,float16,fp8,0,0.041131201386451724
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,2,1,128,1,fp8,fp8,0,0.04121440052986145
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,2,2,128,1,float16,float16,0,0.034985598921775815
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,2,2,128,1,float16,fp8,0,0.03714079856872558
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,2,2,128,1,fp8,fp8,0,0.03700000047683716
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,2,1,128,1,float16,float16,0,0.03503359854221344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,2,1,128,1,fp8,fp8,0,0.04732480049133301
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,2,1,128,1,float16,fp8,0,0.03703359961509704
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,2,1,128,1,fp8,fp8,0,0.0370959997177124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,2,2,128,1,float16,float16,0,0.035067200660705566
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,2,2,128,1,float16,fp8,0,0.0350271999835968
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,2,1,128,1,float16,float16,0,0.03504959940910339
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,2,1,128,1,float16,fp8,0,0.03496159911155701
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,2,2,128,1,fp8,fp8,0,0.034990400075912476
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,2,1,128,1,fp8,fp8,0,0.035078400373458864
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,2,1,128,1,float16,float16,0,0.1271664023399353
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,2,2,128,1,fp8,fp8,0,0.06372159719467163
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,2,1,128,1,float16,fp8,0,0.19317439794540406
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,2,2,128,1,float16,float16,0,0.07660800218582153
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,2,2,128,1,float16,fp8,0,0.10652320384979248
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,2,2,128,1,fp8,fp8,0,0.10627360343933105
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1536,2,1,128,1,fp8,fp8,0,0.19392000436782836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,2,1,128,1,float16,float16,0,0.07442560195922851
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,2,2,128,1,float16,float16,0,0.0476063996553421
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,2,1,128,1,fp8,fp8,0,0.10544159412384033
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,2,1,128,1,float16,fp8,0,0.10642080307006836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,2,2,128,1,float16,fp8,0,0.06270239949226379
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,2,2,128,1,fp8,fp8,0,0.06298080086708069
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,2,1,128,1,float16,float16,0,0.047363200783729555
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,2,1,128,1,float16,fp8,0,0.06240479946136475
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,2,1,128,1,fp8,fp8,0,0.06240479946136475
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,2,2,128,1,float16,float16,0,0.033036801218986514
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,2,2,128,1,float16,fp8,0,0.04059840142726898
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,2,2,128,1,fp8,fp8,0,0.04108479917049408
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,2,1,128,1,float16,float16,0,0.03300319910049439
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,2,1,128,1,float16,fp8,0,0.04118559956550598
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,2,1,128,1,fp8,fp8,0,0.04026240110397339
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,2,2,128,1,float16,fp8,0,0.03302879929542542
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,2,2,128,1,fp8,fp8,0,0.032996800541877744
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,2,1,128,1,float16,float16,0,0.029284799098968507
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,2,1,128,1,float16,fp8,0,0.033022400736808774
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,2,1,128,1,fp8,fp8,0,0.03306559920310974
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,2,2,128,1,float16,float16,0,0.028891199827194215
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,2,2,128,1,float16,fp8,0,0.02996160089969635
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,2,2,128,1,fp8,fp8,0,0.03046880066394806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,2,1,128,1,float16,float16,0,0.02884640097618103
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,2,1,128,1,float16,fp8,0,0.029363200068473816
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,2,1,128,1,fp8,fp8,0,0.029732799530029295
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,2,2,128,1,float16,float16,0,0.02884320020675659
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,2,2,128,1,float16,fp8,0,0.028790399432182312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,2,2,128,1,fp8,fp8,0,0.028881600499153136
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,2,1,128,1,float16,fp8,0,0.028859201073646545
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,2,1,128,1,fp8,fp8,0,0.02887359857559204
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,2,1,128,1,float16,float16,0,0.13400479555130004
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,2,1,128,1,float16,fp8,0,0.22643840312957764
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,2,2,128,1,float16,float16,0,0.07829599976539611
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1024,2,1,128,1,fp8,fp8,0,0.2275439977645874
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,2,1,128,1,fp8,fp8,0,0.05760480165481567
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,2,2,128,1,float16,fp8,0,0.12240799665451049
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,2,2,128,1,fp8,fp8,0,0.12311359643936157
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,2,1,128,1,float16,float16,0,0.07765120267868042
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,2,1,128,1,fp8,fp8,0,0.12100319862365723
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,2,2,128,1,float16,float16,0,0.048574399948120114
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,2,1,128,1,float16,fp8,0,0.12075519561767578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,2,2,128,1,float16,fp8,0,0.06825119853019715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,2,1,128,1,float16,float16,0,0.04641599953174591
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,2,1,128,1,float16,fp8,0,0.06797279715538025
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,2,1,128,1,fp8,fp8,0,0.06792640089988708
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,2,2,128,1,float16,fp8,0,0.039164799451828006
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,2,2,128,1,float16,float16,0,0.02886880040168762
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,2,2,128,1,fp8,fp8,0,0.03917439877986908
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,2,1,128,1,float16,float16,0,0.028496000170707702
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,2,1,128,1,float16,fp8,0,0.03917919993400574
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,2,2,128,1,float16,float16,0,0.02478879988193512
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,2,1,128,1,fp8,fp8,0,0.03916319906711578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,2,2,128,1,float16,fp8,0,0.02978079915046692
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,2,2,128,1,fp8,fp8,0,0.03091840147972107
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,2,1,128,1,float16,float16,0,0.02480800002813339
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,2,1,128,1,float16,fp8,0,0.02913439869880676
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,2,2,128,1,float16,float16,0,0.022763200104236603
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,2,2,128,1,float16,fp8,0,0.02534720003604889
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,2,2,128,1,fp8,fp8,0,0.024830399453639983
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,2,1,128,1,float16,float16,0,0.022776000201702118
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,2,1,128,1,float16,fp8,0,0.0248416006565094
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,2,2,128,1,float16,float16,0,0.022767999768257143
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,2,2,128,1,float16,float16,0,0.030862399935722352
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,2,2,128,1,float16,fp8,0,0.022776000201702118
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,2,2,128,1,fp8,fp8,0,0.02330880016088486
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,2,1,128,1,float16,float16,0,0.022694399952888487
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,2,1,128,1,float16,fp8,0,0.0226623997092247
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,2,1,128,1,float16,float16,0,0.028887999057769776
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,2,1,128,1,fp8,fp8,0,0.022785599529743194
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,2,2,128,1,float16,float16,0,0.02266079932451248
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,2,2,128,1,float16,fp8,0,0.022676800191402436
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,2,2,128,1,fp8,fp8,0,0.022758400440216063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,2,1,128,1,fp8,fp8,0,0.08597599864006042
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,2,1,128,1,float16,fp8,0,0.022771200537681578
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,2,1,128,1,fp8,fp8,0,0.022742399573326112
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,2,1,128,1,fp8,fp8,0,0.05138880014419556
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,2,1,128,1,float16,float16,0,0.10920319557189942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,2,1,128,1,float16,fp8,0,0.2033519983291626
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,2,2,128,1,float16,float16,0,0.06384000182151794
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,2,2,128,1,float16,fp8,0,0.10868959426879883
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,512,2,1,128,1,fp8,fp8,0,0.2047152042388916
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,2,2,128,1,fp8,fp8,0,0.1083840012550354
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,2,1,128,1,float16,float16,0,0.06177440285682678
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,2,2,128,1,fp8,fp8,0,0.06802719831466675
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,2,1,128,1,float16,fp8,0,0.10705280303955078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,2,2,128,1,float16,float16,0,0.03853600025177002
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,2,2,128,1,float16,fp8,0,0.05964959859848022
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,2,1,128,1,float16,float16,0,0.036585599184036255
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,2,2,128,1,fp8,fp8,0,0.05969600081443786
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,2,2,128,1,float16,float16,0,0.0227743998169899
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,2,1,128,1,float16,fp8,0,0.059654402732849124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,2,2,128,1,float16,fp8,0,0.03364799916744232
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,2,2,128,1,fp8,fp8,0,0.03304480016231537
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,2,1,128,1,float16,float16,0,0.02268960028886795
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,2,1,128,1,float16,fp8,0,0.03442560136318207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,2,2,128,1,float16,float16,0,0.018719999492168425
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,2,1,128,1,fp8,fp8,0,0.030934399366378783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,2,2,128,1,float16,fp8,0,0.024817599356174468
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,2,2,128,1,fp8,fp8,0,0.024833600223064422
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,2,1,128,1,float16,float16,0,0.018665599822998046
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,2,1,128,1,fp8,fp8,0,0.02513119876384735
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,2,1,128,1,float16,fp8,0,0.0248416006565094
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,2,2,128,1,float16,float16,0,0.016976000368595125
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,2,2,128,1,float16,fp8,0,0.019438399374485014
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,2,2,128,1,fp8,fp8,0,0.020623999834060668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,2,1,128,1,float16,float16,0,0.016582399606704712
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,2,1,128,1,float16,fp8,0,0.020630399882793426
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,2,1,128,1,fp8,fp8,0,0.019156800210475923
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,2,2,128,1,float16,float16,0,0.01656640022993088
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,2,2,128,1,float16,fp8,0,0.016966399550437928
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,2,2,128,1,fp8,fp8,0,0.01858240067958832
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,2,1,128,1,float16,float16,0,0.016599999368190767
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,2,1,128,1,float16,fp8,0,0.016867199540138246
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,2,2,128,1,float16,float16,0,0.016569599509239197
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,2,1,128,1,fp8,fp8,0,0.01685120016336441
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,2,2,128,1,float16,fp8,0,0.01656319946050644
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,2,2,128,1,fp8,fp8,0,0.016612799465656282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,2,1,128,1,float16,float16,0,0.016547200083732606
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,2,1,128,1,float16,fp8,0,0.01666080057621002
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,2,2,128,1,float16,float16,0,0.016612799465656282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,2,1,128,1,fp8,fp8,0,0.016638399660587312
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,2,2,128,1,float16,fp8,0,0.016599999368190767
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,2,2,128,1,fp8,fp8,0,0.016628800332546233
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,2,1,128,1,float16,float16,0,0.016577599942684172
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,2,1,128,1,float16,fp8,0,0.016579200327396394
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,2,1,128,1,fp8,fp8,0,0.016612799465656282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,2,1,128,1,float16,float16,0,0.053553599119186404
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,2,1,128,1,float16,fp8,0,0.10017759799957275
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,256,2,1,128,1,fp8,fp8,0,0.09864479899406434
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,2,2,128,1,float16,float16,0,0.03128800094127655
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,2,2,128,1,float16,fp8,0,0.05347359776496887
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,2,1,128,1,float16,float16,0,0.03031040132045746
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,2,2,128,1,fp8,fp8,0,0.053508800268173215
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,2,1,128,1,float16,fp8,0,0.05353119969367981
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,2,2,128,1,float16,float16,0,0.018694399297237395
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,2,1,128,1,fp8,fp8,0,0.05349439978599548
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,2,1,128,1,float16,float16,0,0.018587200343608855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,2,2,128,1,float16,fp8,0,0.03094879984855652
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,2,2,128,1,fp8,fp8,0,0.03097119927406311
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,2,1,128,1,float16,fp8,0,0.030899199843406677
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,2,1,128,1,fp8,fp8,0,0.03094879984855652
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,2,2,128,1,float16,fp8,0,0.02271360009908676
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,2,2,128,1,fp8,fp8,0,0.020904000103473663
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,2,1,128,1,fp8,fp8,0,0.10665600299835205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,2,1,128,1,float16,float16,0,0.01652639955282211
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,2,1,128,1,float16,fp8,0,0.020793600380420683
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,2,1,128,1,fp8,fp8,0,0.020716799795627593
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,2,2,128,1,float16,float16,0,0.014587199687957764
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,2,2,128,1,float16,fp8,0,0.016663999855518342
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,2,2,128,1,fp8,fp8,0,0.016612799465656282
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,2,1,128,1,float16,float16,0,0.014588800072669984
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,2,1,128,1,fp8,fp8,0,0.05956799983978271
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,2,1,128,1,float16,fp8,0,0.016568000614643096
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,2,2,128,1,float16,float16,0,0.01345600038766861
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,2,2,128,1,float16,fp8,0,0.014611199498176575
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,2,1,128,1,float16,float16,0,0.012883199751377106
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,2,1,128,1,fp8,fp8,0,0.014508800208568573
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,2,1,128,1,float16,fp8,0,0.014555199444293976
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,2,1,128,1,fp8,fp8,0,0.033843201398849485
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,2,2,128,1,float16,float16,0,0.012544000148773193
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,2,2,128,1,float16,fp8,0,0.014504000544548035
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,2,1,128,1,float16,float16,0,0.01266240030527115
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,2,1,128,1,float16,fp8,0,0.014440000057220459
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,2,2,128,1,fp8,fp8,0,0.014511999487876893
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,2,1,128,1,fp8,fp8,0,0.014497600495815277
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,2,2,128,1,float16,float16,0,0.01276639997959137
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,2,2,128,1,float16,fp8,0,0.013513599336147309
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,2,2,128,1,fp8,fp8,0,0.013568000495433807
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,2,1,128,1,float16,float16,0,0.0125231996178627
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,2,1,128,1,float16,fp8,0,0.01318880021572113
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,2,1,128,1,fp8,fp8,0,0.013055999577045441
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,2,2,128,1,float16,float16,0,0.012537600100040435
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,2,2,128,1,float16,fp8,0,0.012516799569129943
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,2,2,128,1,fp8,fp8,0,0.012579199671745301
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,2,1,128,1,float16,float16,0,0.01252640038728714
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,2,1,128,1,float16,fp8,0,0.012566399574279786
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,2,1,128,1,fp8,fp8,0,0.012494400143623352
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,2,1,128,1,float16,float16,0,0.032971200346946714
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,2,1,128,1,float16,fp8,0,0.055504000186920165
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,128,2,1,128,1,fp8,fp8,0,0.05545600056648255
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,2,2,128,1,float16,float16,0,0.020695999264717102
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,2,2,128,1,float16,fp8,0,0.03099200129508972
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,2,2,128,1,fp8,fp8,0,0.03146080076694489
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,2,1,128,1,float16,float16,0,0.020139199495315552
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,2,1,128,1,float16,fp8,0,0.03140000104904175
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,2,1,128,1,fp8,fp8,0,0.03101919889450073
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,2,2,128,1,float16,float16,0,0.014483200013637542
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,2,2,128,1,float16,fp8,0,0.01900160014629364
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,2,1,128,1,float16,float16,0,0.022654399275779724
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,2,1,128,1,float16,float16,0,0.014467200636863709
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,2,2,128,1,fp8,fp8,0,0.01919520050287247
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,2,1,128,1,float16,fp8,0,0.018692800402641298
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,2,1,128,1,fp8,fp8,0,0.01902880072593689
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,2,2,128,1,float16,float16,0,0.01250240057706833
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,2,2,128,1,float16,fp8,0,0.014601600170135499
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,2,2,128,1,fp8,fp8,0,0.014582400023937226
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,2,1,128,1,float16,float16,0,0.01250240057706833
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,2,1,128,1,float16,fp8,0,0.014577600359916686
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,2,2,128,1,float16,float16,0,0.010480000078678131
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,2,2,128,1,float16,fp8,0,0.012508800625801087
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,2,1,128,1,fp8,fp8,0,0.014601600170135499
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,2,2,128,1,fp8,fp8,0,0.012532800436019897
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,2,1,128,1,float16,float16,0,0.010476800054311753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,2,1,128,1,float16,fp8,0,0.01252480000257492
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,2,1,128,1,fp8,fp8,0,0.012457600235939026
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,2,2,128,1,float16,float16,0,0.010398399829864503
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,2,2,128,1,float16,fp8,0,0.011348800361156463
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,2,2,128,1,fp8,fp8,0,0.01050880029797554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,2,1,128,1,float16,float16,0,0.010468800365924836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,2,1,128,1,float16,fp8,0,0.010932800173759461
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,2,1,128,1,fp8,fp8,0,0.010487999767065048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,2,2,128,1,float16,float16,0,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,2,2,128,1,float16,fp8,0,0.01050880029797554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,2,2,128,1,fp8,fp8,0,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,2,1,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,2,1,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,2,2,128,1,float16,float16,0,0.010351999849081039
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,2,2,128,1,float16,fp8,0,0.010502400249242783
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,2,2,128,1,fp8,fp8,0,0.010440000146627427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,2,1,128,1,fp8,fp8,0,0.01669279932975769
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,2,1,128,1,float16,float16,0,0.01040000021457672
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,2,1,128,1,float16,fp8,0,0.010364799946546554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,2,2,128,1,fp8,fp8,0,0.014575999975204468
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,2,2,128,1,float16,fp8,0,0.010409600287675857
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,2,1,128,1,float16,float16,0,0.010388799756765366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,2,2,128,1,fp8,fp8,0,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,2,1,128,1,fp8,fp8,0,0.010486400127410889
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,2,1,128,1,float16,float16,0,0.02627040147781372
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,2,1,128,1,fp8,fp8,0,0.02480800002813339
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,2,1,128,1,float16,fp8,0,0.03708640038967133
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,2,2,128,1,float16,float16,0,0.016620799899101257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,64,2,1,128,1,fp8,fp8,0,0.03710080087184906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,2,2,128,1,float16,fp8,0,0.022728000581264497
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,2,2,128,1,fp8,fp8,0,0.022785599529743194
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,2,1,128,1,float16,float16,0,0.016604800522327424
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,2,1,128,1,float16,fp8,0,0.02276960015296936
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,2,1,128,1,fp8,fp8,0,0.02272319942712784
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,2,2,128,1,float16,float16,0,0.012441600114107132
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,2,2,128,1,float16,fp8,0,0.014563199877738953
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,2,2,128,1,fp8,fp8,0,0.014590400457382201
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,2,1,128,1,float16,float16,0,0.012478400021791458
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,2,1,128,1,float16,fp8,0,0.014608000218868256
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,2,1,128,1,fp8,fp8,0,0.01454399973154068
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,2,2,128,1,float16,float16,0,0.010480000078678131
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,2,2,128,1,float16,fp8,0,0.012520000338554382
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,2,2,128,1,fp8,fp8,0,0.012564800679683685
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,2,1,128,1,float16,float16,0,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,2,1,128,1,float16,fp8,0,0.012513600289821625
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,2,1,128,1,fp8,fp8,0,0.012483199685811996
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,2,2,128,1,float16,float16,0,0.010518400371074677
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,2,2,128,1,float16,fp8,0,0.010438399761915207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,2,2,128,1,fp8,fp8,0,0.0105103999376297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,2,1,128,1,float16,float16,0,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,2,1,128,1,float16,fp8,0,0.010471999645233154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,2,2,128,1,float16,float16,0,0.010473600029945374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,2,1,128,1,fp8,fp8,0,0.010441599786281586
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,2,2,128,1,float16,fp8,0,0.010454399883747101
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,2,2,128,1,fp8,fp8,0,0.01045600026845932
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,2,1,128,1,float16,float16,0,0.010440000146627427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,2,1,128,1,float16,fp8,0,0.010444799810647965
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,2,1,128,1,fp8,fp8,0,0.010494399815797806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,2,2,128,1,float16,float16,0,0.010532800108194351
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,2,2,128,1,float16,float16,0,0.016515199840068818
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,2,2,128,1,fp8,fp8,0,0.010396800190210342
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,2,1,128,1,float16,float16,0,0.010360000282526016
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,2,1,128,1,float16,fp8,0,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,2,1,128,1,fp8,fp8,0,0.010393600165843963
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,2,2,128,1,float16,float16,0,0.009387200325727462
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,2,2,128,1,float16,fp8,0,0.009672000259160995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,2,2,128,1,fp8,fp8,0,0.009566400200128555
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,2,1,128,1,float16,float16,0,0.009430400282144546
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,2,1,128,1,float16,fp8,0,0.009438399970531464
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,2,1,128,1,fp8,fp8,0,0.009123200178146362
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,2,2,128,1,float16,float16,0,0.010436800122261048
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,2,2,128,1,float16,fp8,0,0.010414399951696397
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,2,2,128,1,fp8,fp8,0,0.010473600029945374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,2,1,128,1,float16,float16,0,0.010388799756765366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,2,1,128,1,float16,fp8,0,0.010387200117111205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,2,1,128,1,float16,fp8,0,0.010486400127410889
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,2,1,128,1,fp8,fp8,0,0.010468800365924836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,2,1,128,1,fp8,fp8,0,0.027110400795936584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,2,1,128,1,float16,fp8,0,0.027166399359703063
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,2,2,128,1,float16,float16,0,0.014668799936771393
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,2,2,128,1,float16,fp8,0,0.018249599635601042
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,2,2,128,1,fp8,fp8,0,0.017003199458122252
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,2,1,128,1,float16,float16,0,0.014657600224018097
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,2,1,128,1,float16,fp8,0,0.016832000017166136
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,2,1,128,1,fp8,fp8,0,0.01706079989671707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,2,2,128,1,float16,float16,0,0.011209599673748016
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,2,2,128,1,float16,fp8,0,0.012511999905109405
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,2,2,128,1,fp8,fp8,0,0.012545600533485413
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,2,1,128,1,float16,float16,0,0.010564800351858139
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,2,1,128,1,float16,fp8,0,0.012520000338554382
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,2,1,128,1,fp8,fp8,0,0.01257600039243698
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,2,2,128,1,float16,float16,0,0.010468800365924836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,2,2,128,1,float16,fp8,0,0.010542400181293488
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,2,2,128,1,fp8,fp8,0,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,2,1,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,2,1,128,1,float16,fp8,0,0.010492800176143647
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,2,1,128,1,fp8,fp8,0,0.010486400127410889
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,2,2,128,1,float16,float16,0,0.010281600058078766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,2,2,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,2,2,128,1,fp8,fp8,0,0.010467199981212616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,2,1,128,1,float16,float16,0,0.010409600287675857
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,2,1,128,1,fp8,fp8,0,0.010507199913263321
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,2,1,128,1,float16,fp8,0,0.010414399951696397
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,2,2,128,1,float16,float16,0,0.010476800054311753
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,2,2,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,2,2,128,1,fp8,fp8,0,0.009691199660301209
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,2,1,128,1,float16,float16,0,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,2,1,128,1,float16,fp8,0,0.009364800155162811
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,2,1,128,1,fp8,fp8,0,0.010375999659299851
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,2,2,128,1,float16,fp8,0,0.010465600341558457
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,2,2,128,1,float16,float16,0,0.009350399672985076
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,2,2,128,1,fp8,fp8,0,0.008748800307512284
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,2,1,128,1,float16,float16,0,0.009355200082063675
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,2,1,128,1,float16,fp8,0,0.008510400354862214
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,2,1,128,1,fp8,fp8,0,0.008767999708652496
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,2,2,128,1,float16,fp8,0,0.010454399883747101
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,2,2,128,1,float16,float16,0,0.00843520015478134
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,2,2,128,1,float16,fp8,0,0.008446399867534638
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,2,1,128,1,float16,float16,0,0.008428800106048583
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,2,1,128,1,float16,fp8,0,0.00846880003809929
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,2,1,128,1,float16,float16,0,0.010446400195360184
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,2,2,128,1,float16,float16,0,0.00843840017914772
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,2,2,128,1,fp8,fp8,0,0.010494399815797806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,2,1,128,1,float16,float16,0,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,2,1,128,1,fp8,fp8,0,0.009471999853849411
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,2,2,128,1,float16,float16,0,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,2,1,128,1,float16,float16,0,0.02073120027780533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,2,1,128,1,float16,fp8,0,0.022761599719524385
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,2,2,128,1,float16,float16,0,0.0144896000623703
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,2,2,128,1,float16,fp8,0,0.014716799557209014
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,32,2,1,128,1,float16,float16,0,0.022702400386333466
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,2,2,128,1,fp8,fp8,0,0.014580799639225006
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,2,1,128,1,float16,float16,0,0.01451359987258911
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,2,1,128,1,float16,fp8,0,0.014528000354766845
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,2,1,128,1,fp8,fp8,0,0.01480640023946762
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,2,2,128,1,float16,float16,0,0.010390400141477584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,2,2,128,1,float16,fp8,0,0.010897599905729295
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,2,2,128,1,fp8,fp8,0,0.010524799674749374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,2,1,128,1,float16,float16,0,0.010369600355625152
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,2,1,128,1,float16,fp8,0,0.010489600151777268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,2,1,128,1,fp8,fp8,0,0.010428799688816071
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,2,2,128,1,float16,float16,0,0.010412800312042236
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,2,2,128,1,float16,fp8,0,0.010412800312042236
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,2,2,128,1,fp8,fp8,0,0.010406400263309478
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,2,1,128,1,float16,float16,0,0.010424000024795533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,2,1,128,1,float16,fp8,0,0.010375999659299851
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,2,2,128,1,float16,float16,0,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,2,2,128,1,float16,fp8,0,0.01067200005054474
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,2,2,128,1,fp8,fp8,0,0.0104032002389431
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,2,1,128,1,float16,float16,0,0.010398399829864503
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,2,1,128,1,float16,fp8,0,0.009459199756383896
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,2,1,128,1,fp8,fp8,0,0.009377600252628326
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,2,2,128,1,float16,float16,0,0.008750399947166443
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,2,2,128,1,float16,fp8,0,0.00865280032157898
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,2,2,128,1,fp8,fp8,0,0.010411199927330018
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,2,1,128,1,float16,float16,0,0.010406400263309478
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,2,1,128,1,float16,fp8,0,0.009399999678134919
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,2,1,128,1,fp8,fp8,0,0.00968480035662651
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,2,2,128,1,fp8,fp8,0,0.009489600360393525
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,2,1,128,1,fp8,fp8,0,0.00852160006761551
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,2,2,128,1,float16,fp8,0,0.010150399804115296
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,2,2,128,1,fp8,fp8,0,0.010028800368309021
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,2,1,128,1,float16,fp8,0,0.00841120034456253
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,2,1,128,1,float16,float16,0,0.010462400317192078
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,2,1,128,1,float16,fp8,0,0.008427199721336365
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,2,1,128,1,fp8,fp8,0,0.010491199791431427
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,2,1,128,1,fp8,fp8,0,0.00912960022687912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,2,2,128,1,float16,float16,0,0.008478400111198426
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,2,2,128,1,float16,fp8,0,0.008470399677753449
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,2,2,128,1,fp8,fp8,0,0.008432000130414962
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,2,1,128,1,float16,float16,0,0.008392000198364257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,16,2,1,128,1,fp8,fp8,0,0.022785599529743194
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,2,1,128,1,float16,fp8,0,0.008448000252246856
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,2,1,128,1,fp8,fp8,0,0.009151999652385712
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,2,2,128,1,float16,float16,0,0.008430399745702744
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,2,2,128,1,float16,fp8,0,0.008486399799585343
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,2,2,128,1,fp8,fp8,0,0.008500800281763077
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,2,1,128,1,float16,fp8,0,0.00841279998421669
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,2,1,128,1,fp8,fp8,0,0.008446399867534638
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,2,1,128,1,float16,float16,0,0.00851840004324913
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,2,1,128,1,float16,float16,0,0.020710399746894835
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,2,1,128,1,float16,fp8,0,0.018943999707698823
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,2,2,128,1,float16,float16,0,0.014148800075054169
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,256,1,2,1,128,1,fp8,fp8,0,0.01866080015897751
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,2,2,128,1,fp8,fp8,0,0.012681600451469422
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,2,1,128,1,float16,float16,0,0.014009599387645722
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,2,1,128,1,float16,fp8,0,0.012919999659061432
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,2,1,128,1,fp8,fp8,0,0.013195200264453888
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,2,2,128,1,float16,float16,0,0.010463999956846238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,2,2,128,1,float16,fp8,0,0.010467199981212616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,2,2,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,2,1,128,1,float16,fp8,0,0.010468800365924836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,2,1,128,1,fp8,fp8,0,0.010449600219726563
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,2,2,128,1,float16,float16,0,0.010438399761915207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,2,2,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,2,2,128,1,fp8,fp8,0,0.010388799756765366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,2,1,128,1,float16,float16,0,0.010463999956846238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,2,1,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,2,1,128,1,fp8,fp8,0,0.010388799756765366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,2,2,128,1,float16,float16,0,0.010388799756765366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,2,2,128,1,float16,fp8,0,0.009097599983215332
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,2,2,128,1,fp8,fp8,0,0.01048320010304451
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,2,1,128,1,float16,float16,0,0.010369600355625152
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,2,1,128,1,float16,fp8,0,0.008975999802350998
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,2,1,128,1,fp8,fp8,0,0.009412799775600434
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,2,2,128,1,float16,float16,0,0.008931200206279754
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,2,2,128,1,float16,fp8,0,0.008542399853467941
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,2,2,128,1,fp8,fp8,0,0.008452799916267396
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,2,1,128,1,float16,float16,0,0.010284800082445145
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,2,1,128,1,float16,fp8,0,0.008423999696969987
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,2,1,128,1,fp8,fp8,0,0.010454399883747101
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,2,2,128,1,float16,float16,0,0.010369600355625152
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,2,2,128,1,float16,fp8,0,0.009390400350093841
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,2,1,128,1,float16,float16,0,0.010041599720716476
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,2,1,128,1,float16,fp8,0,0.008422400057315826
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,2,1,128,1,fp8,fp8,0,0.008403199911117553
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,2,2,128,1,float16,float16,0,0.008427199721336365
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,2,2,128,1,float16,float16,0,0.009976000338792802
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,2,2,128,1,fp8,fp8,0,0.008371199667453765
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,2,1,128,1,float16,float16,0,0.008630400151014328
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,2,2,128,1,float16,fp8,0,0.008929599821567536
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,2,1,128,1,float16,fp8,0,0.008459199965000153
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,2,2,128,1,float16,float16,0,0.008491200208663941
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,2,1,128,1,fp8,fp8,0,0.008780799806118011
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,2,2,128,1,float16,fp8,0,0.008423999696969987
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,2,2,128,1,fp8,fp8,0,0.009710399806499482
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,2,1,128,1,float16,float16,0,0.00843520015478134
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,2,1,128,1,float16,fp8,0,0.009393599629402161
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,2,1,128,1,fp8,fp8,0,0.008374399691820144
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,1,1,128,1,float16,float16,0,0.33138399124145507
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,2,2,128,1,float16,fp8,0,0.01369439959526062
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,1,1,128,1,float16,fp8,0,0.36047360897064207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,1,1,128,1,float16,float16,0,0.21324479579925537
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,2,1,128,1,float16,float16,0,0.010558400303125381
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16384,1,1,128,1,fp8,fp8,0,0.3585504055023193
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,1,1,128,1,float16,fp8,0,0.22389600276947022
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16384,1,1,128,1,fp8,fp8,0,0.22429280281066893
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,1,1,128,1,float16,float16,0,0.20358719825744628
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,2,1,128,1,fp8,fp8,0,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,1,1,128,1,float16,fp8,0,0.20499200820922853
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16384,1,1,128,1,fp8,fp8,0,0.2049247980117798
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,1,1,128,1,float16,float16,0,0.21065759658813477
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,1,1,128,1,float16,float16,0,0.1610416054725647
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,1,1,128,1,float16,fp8,0,0.17040799856185912
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,1,1,128,1,float16,float16,0,0.1544927954673767
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,1,1,128,1,fp8,fp8,0,0.2360896110534668
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,1,1,128,1,float16,fp8,0,0.15794240236282348
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,12288,1,1,128,1,fp8,fp8,0,0.15588959455490112
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,1,1,128,1,float16,float16,0,0.1558768033981323
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,1,1,128,1,float16,fp8,0,0.1781056046485901
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,1,1,128,1,float16,float16,0,0.13465919494628906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,10240,1,1,128,1,fp8,fp8,0,0.17885119915008546
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,1,1,128,1,float16,fp8,0,0.14356000423431398
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,10240,1,1,128,1,fp8,fp8,0,0.14450559616088868
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,1,1,128,1,float16,float16,0,0.1298624038696289
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,1,1,128,1,float16,fp8,0,0.13332959413528442
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,10240,1,1,128,1,fp8,fp8,0,0.13199360370635987
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,1,1,128,1,float16,float16,0,0.18909599781036376
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,1,1,128,1,float16,float16,0,0.11808639764785767
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,1,1,128,1,float16,fp8,0,0.2255728006362915
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,8192,1,1,128,1,fp8,fp8,0,0.22598559856414796
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,1,1,128,1,float16,fp8,0,0.13629120588302612
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,2,2,128,1,float16,fp8,0,0.008399999886751174
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,1,1,128,1,float16,float16,0,0.1110416054725647
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,1,1,128,1,float16,fp8,0,0.11761280298233032
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,1,1,128,1,float16,float16,0,0.10699199438095093
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,1,1,128,1,float16,fp8,0,0.10879039764404297
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,8192,1,1,128,1,fp8,fp8,0,0.10878880023956299
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,1,1,128,1,float16,float16,0,0.12304480075836181
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,1,1,128,1,float16,fp8,0,0.15364160537719726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,6144,1,1,128,1,fp8,fp8,0,0.1536527991294861
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,1,1,128,1,float16,float16,0,0.0902463972568512
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,1,1,128,1,float16,fp8,0,0.10468800067901611
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,6144,1,1,128,1,fp8,fp8,0,0.10465279817581177
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,1,1,128,1,float16,float16,0,0.0863919973373413
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,1,1,128,1,float16,fp8,0,0.09037439823150635
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,6144,1,1,128,1,fp8,fp8,0,0.0903663992881775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,1,1,128,1,float16,float16,0,0.08229920268058777
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,1,1,128,1,float16,fp8,0,0.08428159952163697
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,6144,1,1,128,1,fp8,fp8,0,0.08409759998321534
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,1,1,128,1,float16,float16,0,0.11477279663085938
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,1,1,128,1,float16,fp8,0,0.1566975951194763
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,1,1,128,1,float16,float16,0,0.06962080001831054
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,4096,1,1,128,1,fp8,fp8,0,0.15604640245437623
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,1,1,128,1,float16,fp8,0,0.09044479727745056
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,4096,1,1,128,1,fp8,fp8,0,0.09053919911384582
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,12288,1,1,128,1,fp8,fp8,0,0.17101600170135497
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,12288,1,1,128,1,float16,fp8,0,0.23681919574737548
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,1,1,128,1,float16,float16,0,0.06375839710235595
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,1,1,128,1,float16,fp8,0,0.07262719869613647
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,1,1,128,1,float16,float16,0,0.059614402055740354
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,1,1,128,1,float16,fp8,0,0.06365119814872741
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,4096,1,1,128,1,fp8,fp8,0,0.06373760104179382
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,1,1,128,1,float16,float16,0,0.057923197746276855
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,1,1,128,1,float16,fp8,0,0.05953119993209839
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,4096,1,1,128,1,fp8,fp8,0,0.05960639715194702
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,2,2,128,1,fp8,fp8,0,0.009401600062847137
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,1,1,128,1,float16,float16,0,0.07823839783668518
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,1,1,128,1,float16,fp8,0,0.11091519594192505
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,3072,1,1,128,1,fp8,fp8,0,0.11079039573669433
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,1,1,128,1,float16,float16,0,0.054206401109695435
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,1,1,128,1,float16,fp8,0,0.07004799842834472
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,1,1,128,1,float16,float16,0,0.04940159916877747
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,3072,1,1,128,1,fp8,fp8,0,0.07009279727935791
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,1,1,128,1,float16,fp8,0,0.05755680203437805
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,3072,1,1,128,1,fp8,fp8,0,0.05753920078277588
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,1,1,128,1,float16,fp8,0,0.049491199851036075
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,1,1,128,1,fp8,fp8,0,0.05106880068778992
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,1,1,128,1,float16,float16,0,0.04731360077857971
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,1,1,128,1,float16,fp8,0,0.047336000204086306
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,3072,1,1,128,1,fp8,fp8,0,0.047337600588798524
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,8192,1,1,128,1,fp8,fp8,0,0.13572959899902343
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,1,1,128,1,float16,fp8,0,0.1199839949607849
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,1,1,128,1,float16,float16,0,0.045296001434326175
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,1,1,128,1,float16,fp8,0,0.06784960031509399
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,1,1,128,1,fp8,fp8,0,0.12118079662322997
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,1,1,128,1,float16,float16,0,0.03913759887218475
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,2048,1,1,128,1,fp8,fp8,0,0.06778879761695862
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,2048,1,1,128,1,float16,float16,0,0.07591999769210815
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,1,1,128,1,float16,fp8,0,0.04939680099487305
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,2048,1,1,128,1,fp8,fp8,0,0.04941760003566742
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,1,1,128,1,float16,float16,0,0.037011200189590455
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,1,1,128,1,float16,fp8,0,0.041171199083328246
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,2048,1,1,128,1,fp8,fp8,0,0.0411296010017395
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,1,1,128,1,float16,float16,0,0.03499679863452911
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,1,1,128,1,float16,fp8,0,0.03710080087184906
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,1,1,128,1,float16,float16,0,0.035016000270843506
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,1,1,128,1,float16,fp8,0,0.03500480055809021
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,2048,1,1,128,1,fp8,fp8,0,0.034995201230049136
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,2048,1,1,128,1,fp8,fp8,0,0.037115201354026794
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,1,1,128,1,float16,fp8,0,0.08830400109291077
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,1,1,128,1,fp8,fp8,0,0.08810399770736695
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,1,1,128,1,float16,float16,0,0.035071998834609985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,1,1,128,1,float16,fp8,0,0.053495997190475465
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,1,1,128,1,float16,float16,0,0.031196799874305726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,1,1,128,1,float16,fp8,0,0.039392000436782836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1536,1,1,128,1,fp8,fp8,0,0.039215999841690066
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,1,1,128,1,float16,float16,0,0.029022398591041564
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1536,1,1,128,1,fp8,fp8,0,0.0534608006477356
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,1,1,128,1,float16,fp8,0,0.03301759958267212
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,1,1,128,1,float16,float16,0,0.028839999437332155
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,1,1,128,1,float16,fp8,0,0.029099199175834655
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1536,1,1,128,1,fp8,fp8,0,0.030876800417900085
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,1,1,128,1,float16,float16,0,0.028830400109291075
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,1,1,128,1,float16,fp8,0,0.028911998867988585
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1536,1,1,128,1,fp8,fp8,0,0.028828799724578857
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,1,1,128,1,float16,float16,0,0.055636799335479735
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,1,1,128,1,float16,fp8,0,0.10211999416351318
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,1,1,128,1,float16,float16,0,0.03275519907474518
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1024,1,1,128,1,fp8,fp8,0,0.1022495985031128
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,1,1,128,1,float16,float16,0,0.02685759961605072
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,1,1,128,1,float16,fp8,0,0.039073601365089417
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1024,1,1,128,1,fp8,fp8,0,0.03905439972877502
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,1,1,128,1,float16,float16,0,0.02468640059232712
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,1,1,128,1,float16,fp8,0,0.028951999545097352
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,4096,1,1,128,1,fp8,fp8,0,0.0720255970954895
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1024,1,1,128,1,fp8,fp8,0,0.028896000981330872
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,1,1,128,1,float16,float16,0,0.022681599855422972
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,1,1,128,1,float16,fp8,0,0.025128000974655153
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1024,1,1,128,1,fp8,fp8,0,0.024753600358963013
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,1,1,128,1,float16,float16,0,0.0226623997092247
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,1,1,128,1,float16,fp8,0,0.022699199616909027
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1024,1,1,128,1,fp8,fp8,0,0.022702400386333466
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,1,1,128,1,float16,float16,0,0.020953600108623505
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,1,1,128,1,float16,fp8,0,0.022606399655342103
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,3072,1,1,128,1,float16,float16,0,0.047337600588798524
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1024,1,1,128,1,fp8,fp8,0,0.022683200240135194
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,1,1,128,1,float16,float16,0,0.0458624005317688
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,8192,1,1,128,1,fp8,fp8,0,0.11690080165863037
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,1,1,128,1,float16,fp8,0,0.09375519752502441
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,512,1,1,128,1,fp8,fp8,0,0.09238560199737549
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,1,1,128,1,float16,float16,0,0.02683840095996857
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,1,1,128,1,float16,fp8,0,0.05134879946708679
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,512,1,1,128,1,fp8,fp8,0,0.050040000677108766
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,1,1,128,1,float16,float16,0,0.02072640061378479
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,1,1,128,1,float16,fp8,0,0.03290719985961914
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,512,1,1,128,1,fp8,fp8,0,0.032927998900413515
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,1,1,128,1,float16,float16,0,0.018644799292087556
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,1,1,128,1,fp8,fp8,0,0.024332800507545473
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,1,1,128,1,float16,float16,0,0.016604800522327424
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,512,1,1,128,1,float16,fp8,0,0.02476319968700409
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1536,1,1,128,1,float16,float16,0,0.05346400141716003
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,1,1,128,1,float16,fp8,0,0.018750399351119995
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,512,1,1,128,1,fp8,fp8,0,0.020579199492931365
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,1,1,128,1,float16,float16,0,0.016516800224781036
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,1,1,128,1,float16,fp8,0,0.01668799966573715
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,512,1,1,128,1,fp8,fp8,0,0.016628800332546233
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,1,1,128,1,float16,fp8,0,0.01653600037097931
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,1,1,128,1,fp8,fp8,0,0.016604800522327424
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,1,1,128,1,float16,float16,0,0.01643040031194687
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,1,1,128,1,float16,fp8,0,0.01656319946050644
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,512,1,1,128,1,fp8,fp8,0,0.015510399639606477
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,1,1,128,1,float16,float16,0,0.022745600342750548
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1536,1,1,128,1,fp8,fp8,0,0.03303999900817871
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,1,1,128,1,float16,fp8,0,0.047279998660087585
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,1,1,128,1,float16,float16,0,0.01865279972553253
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,256,1,1,128,1,fp8,fp8,0,0.04734239876270294
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,1,1,128,1,float16,float16,0,0.014800000190734863
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,1,1,128,1,fp8,fp8,0,0.028907200694084166
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,1,1,128,1,float16,fp8,0,0.020703999698162077
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,256,1,1,128,1,fp8,fp8,0,0.020761600136756896
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,1,1,128,1,float16,fp8,0,0.016601599752902985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,1,1,128,1,fp8,fp8,0,0.016599999368190767
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,1,1,128,1,float16,float16,0,0.01263200044631958
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,1,1,128,1,float16,fp8,0,0.055593597888946536
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,1,1,128,1,float16,fp8,0,0.014590400457382201
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,256,1,1,128,1,fp8,fp8,0,0.01451680064201355
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,1,1,128,1,float16,float16,0,0.012470400333404541
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,1,1,128,1,float16,fp8,0,0.014582400023937226
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,256,1,1,128,1,fp8,fp8,0,0.014507199823856353
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,1,1,128,1,float16,float16,0,0.012545600533485413
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,1,1,128,1,fp8,fp8,0,0.012516799569129943
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,1,1,128,1,float16,float16,0,0.01252640038728714
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,256,1,1,128,1,float16,fp8,0,0.012723200023174286
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,1,1,128,1,float16,fp8,0,0.012559999525547028
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,256,1,1,128,1,fp8,fp8,0,0.012559999525547028
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,1,1,128,1,float16,float16,0,0.01613280028104782
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,1,1,128,1,float16,fp8,0,0.028887999057769776
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,1,1,128,1,float16,float16,0,0.012535999715328216
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,1,1,128,1,float16,fp8,0,0.018692800402641298
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,128,1,1,128,1,fp8,fp8,0,0.026790401339530943
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,128,1,1,128,1,fp8,fp8,0,0.018697600066661834
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,1,1,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,1,1,128,1,float16,fp8,0,0.014614400267601014
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,128,1,1,128,1,fp8,fp8,0,0.014555199444293976
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,1,1,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,1,1,128,1,float16,fp8,0,0.012572799623012543
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,128,1,1,128,1,fp8,fp8,0,0.012510399520397186
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,1,1,128,1,float16,float16,0,0.010520000010728836
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,1,1,128,1,float16,fp8,0,0.010505600273609162
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,128,1,1,128,1,fp8,fp8,0,0.010556799918413162
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,1,1,128,1,float16,float16,0,0.010438399761915207
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,1,1,128,1,float16,fp8,0,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,128,1,1,128,1,fp8,fp8,0,0.010467199981212616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,1,1,128,1,float16,float16,0,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,1,1,128,1,float16,fp8,0,0.010407999902963639
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,128,1,1,128,1,fp8,fp8,0,0.010451199859380722
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,1,1,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,1,1,128,1,float16,fp8,0,0.010419200360774993
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,128,1,1,128,1,fp8,fp8,0,0.01050880029797554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,1,1,128,1,float16,fp8,0,0.018699200451374055
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,1,1,128,1,float16,float16,0,0.01250240057706833
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,64,1,1,128,1,fp8,fp8,0,0.018667200207710268
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,1,1,128,1,float16,float16,0,0.011956799775362015
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,1,1,128,1,float16,fp8,0,0.014545600116252898
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,64,1,1,128,1,fp8,fp8,0,0.014590400457382201
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,1,1,128,1,float16,float16,0,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,1,1,128,1,float16,fp8,0,0.012486399710178375
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,64,1,1,128,1,fp8,fp8,0,0.012537600100040435
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,1,1,128,1,float16,float16,0,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,1,1,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,1,1,128,1,float16,float16,0,0.010406400263309478
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,1,1,128,1,float16,fp8,0,0.010494399815797806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,64,1,1,128,1,fp8,fp8,0,0.010364799946546554
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,1,1,128,1,float16,float16,0,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,1,1,128,1,float16,fp8,0,0.010473600029945374
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,64,1,1,128,1,fp8,fp8,0,0.009460800141096116
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,1,1,128,1,float16,float16,0,0.010358399897813796
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,1,1,128,1,float16,fp8,0,0.008974400162696839
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,64,1,1,128,1,fp8,fp8,0,0.010419200360774993
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,512,1,1,128,1,float16,float16,0,0.016601599752902985
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,1,1,128,1,float16,float16,0,0.010398399829864503
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,1,1,128,1,float16,fp8,0,0.010463999956846238
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,64,1,1,128,1,fp8,fp8,0,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,1,1,128,1,float16,float16,0,0.012481600046157837
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,256,1,1,128,1,float16,fp8,0,0.03046880066394806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,1,1,128,1,fp8,fp8,0,0.014550399780273438
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,256,1,1,128,1,float16,float16,0,0.01454399973154068
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,1,1,128,1,float16,fp8,0,0.012542399764060973
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,1,1,128,1,fp8,fp8,0,0.012436799705028534
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,1,1,128,1,float16,fp8,0,0.010390400141477584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,1,1,128,1,fp8,fp8,0,0.010503999888896942
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1024,1,1,128,1,fp8,fp8,0,0.05558879971504212
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,1,1,128,1,float16,float16,0,0.010382399708032609
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,1,1,128,1,float16,fp8,0,0.01040479987859726
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,32,1,1,128,1,fp8,fp8,0,0.010577599704265594
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,1,1,128,1,float16,fp8,0,0.010380800068378448
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,1,1,128,1,fp8,fp8,0,0.008904000371694564
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,1,1,128,1,float16,float16,0,0.009763199836015701
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,1,1,128,1,float16,fp8,0,0.00941760018467903
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,32,1,1,128,1,fp8,fp8,0,0.009391999989748
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,1,1,128,1,float16,float16,0,0.009017600119113922
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,1,1,128,1,float16,fp8,0,0.008961600065231324
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,32,1,1,128,1,fp8,fp8,0,0.008416000008583068
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,1,1,128,1,float16,float16,0,0.009353599697351455
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,1,1,128,1,float16,fp8,0,0.00921280011534691
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,32,1,1,128,1,fp8,fp8,0,0.009176000207662582
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,1,1,128,1,float16,float16,0,0.010768000036478043
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,1,1,128,1,float16,fp8,0,0.012451200187206269
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,16,1,1,128,1,fp8,fp8,0,0.012521600723266602
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,1,1,128,1,float16,float16,0,0.01037440001964569
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,1,1,128,1,float16,fp8,0,0.010505600273609162
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,16,1,1,128,1,fp8,fp8,0,0.010387200117111205
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,1,1,128,1,float16,fp8,0,0.0104032002389431
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,1,1,128,1,fp8,fp8,0,0.01035040020942688
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,1,1,128,1,float16,float16,0,0.010398399829864503
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,1,1,128,1,float16,fp8,0,0.010390400141477584
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,16,1,1,128,1,fp8,fp8,0,0.009350399672985076
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,1,1,128,1,float16,float16,0,0.009478399902582169
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,1,1,128,1,float16,fp8,0,0.00899839997291565
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,64,1,1,128,1,fp8,fp8,0,0.010494399815797806
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,16,1,1,128,1,fp8,fp8,0,0.008644799888134002
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,1,1,128,1,float16,float16,0,0.008975999802350998
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,1,1,128,1,float16,fp8,0,0.010382399708032609
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,16,1,1,128,1,fp8,fp8,0,0.008457600325345992
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,1,1,128,1,float16,float16,0,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,1,1,128,1,fp8,fp8,0,0.010377600044012069
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,16,1,1,128,1,float16,fp8,0,0.00841120034456253
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,1,1,128,1,float16,float16,0,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,1,1,128,1,float16,fp8,0,0.008726400136947633
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,1,1,128,1,float16,float16,0,0.01061279997229576
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,16,1,1,128,1,fp8,fp8,0,0.010124800354242324
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,1,1,128,1,float16,fp8,0,0.010496000200510025
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,1,1,1,128,1,fp8,fp8,0,0.01048159971833229
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,1,1,128,1,float16,float16,0,0.010471999645233154
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,1,1,128,1,float16,fp8,0,0.010366400331258773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,1,1,1,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,1,1,128,1,float16,fp8,0,0.009662400186061858
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,64,32,1,1,128,1,float16,float16,0,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,1,1,128,1,fp8,fp8,0,0.008470399677753449
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,1,1,128,1,float16,fp8,0,0.009827200323343277
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,1,1,128,1,fp8,fp8,0,0.008420799672603608
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,1,1,128,1,float16,float16,0,0.008388800173997879
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,1,1,128,1,float16,fp8,0,0.009404800087213516
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,1,1,1,128,1,fp8,fp8,0,0.008427199721336365
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,8,32,1,1,128,1,float16,float16,0,0.010366400331258773
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,1,1,128,1,float16,float16,0,0.00942080020904541
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,1,1,128,1,float16,fp8,0,0.008406399935483932
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,4,1,1,1,128,1,fp8,fp8,0,0.008428800106048583
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,1,1,128,1,float16,float16,0,0.00880960002541542
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,1,1,128,1,float16,fp8,0,0.00841120034456253
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,2,1,1,1,128,1,fp8,fp8,0,0.008454400300979614
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,1,1,128,1,float16,float16,0,0.00846719965338707
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,1,1,128,1,float16,fp8,0,0.008369600027799606
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,1,1,1,1,128,1,fp8,fp8,0,0.008423999696969987
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,16,1,1,128,1,float16,float16,0,0.010384000092744827
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,128,32,1,1,128,1,float16,fp8,0,0.014511999487876893
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,1,1,1,128,1,float16,float16,0,0.010388799756765366
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,16,1,1,1,128,1,float16,float16,0,0.010412800312042236
SGLang,0.5.10,NVIDIA B200,context_attention,trtllm_mha,32,32,1,1,128,1,float16,float16,0,0.010446400195360184
