framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,beam_width,attn_dtype,kv_cache_dtype,step,latency
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,96,1,128,1,float16,fp8,0,33.90354919433594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,96,1,128,1,fp8,fp8,0,35.37421875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,96,2,128,1,float16,fp8,0,34.49319763183594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,96,2,128,1,fp8,fp8,0,36.28742980957031
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,96,4,128,1,float16,fp8,0,35.54533386230469
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,96,4,128,1,fp8,fp8,0,36.18575439453125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,96,1,128,1,float16,float16,0,54.485467529296876
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,96,2,128,1,float16,float16,0,54.8771484375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,96,8,128,1,float16,fp8,0,34.136834716796876
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,96,8,128,1,fp8,fp8,0,34.48880310058594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,96,96,128,1,float16,fp8,0,17.169410705566406
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,96,4,128,1,float16,float16,0,55.79148559570312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,96,96,128,1,fp8,fp8,0,18.751322937011718
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,96,1,128,1,float16,fp8,0,17.18311462402344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,96,1,128,1,float16,float16,0,27.744961547851563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,96,1,128,1,fp8,fp8,0,17.42473907470703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,96,2,128,1,float16,float16,0,27.818014526367186
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,96,2,128,1,float16,fp8,0,17.394366455078124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,96,2,128,1,fp8,fp8,0,17.21171875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,96,8,128,1,float16,float16,0,58.94329833984375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,96,4,128,1,float16,float16,0,27.94449462890625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,96,4,128,1,float16,fp8,0,17.822544860839844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,96,4,128,1,fp8,fp8,0,18.142698669433592
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,96,8,128,1,float16,fp8,0,17.881062316894532
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,96,8,128,1,fp8,fp8,0,18.55054168701172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,96,8,128,1,float16,float16,0,28.09859619140625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,96,96,128,1,float16,fp8,0,9.111307525634766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,96,96,128,1,fp8,fp8,0,8.956208038330079
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,96,1,128,1,float16,fp8,0,8.415668487548828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,96,1,128,1,float16,float16,0,13.817655944824219
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,96,1,128,1,fp8,fp8,0,8.830595397949219
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,96,2,128,1,float16,fp8,0,8.684417724609375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,96,2,128,1,float16,float16,0,14.111434936523438
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,96,2,128,1,fp8,fp8,0,8.875619506835937
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,96,4,128,1,float16,float16,0,14.258393859863281
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,96,4,128,1,float16,fp8,0,8.678121948242188
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,96,4,128,1,fp8,fp8,0,8.800132751464844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,96,96,128,1,float16,float16,0,13.874256896972657
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,96,8,128,1,float16,float16,0,14.71911163330078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,96,8,128,1,float16,fp8,0,9.331439971923828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,96,8,128,1,fp8,fp8,0,8.197846221923829
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,96,96,128,1,float16,float16,0,6.194190216064453
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,96,96,128,1,float16,float16,0,27.885195922851562
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,96,96,128,1,float16,fp8,0,4.538195037841797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,96,96,128,1,fp8,fp8,0,4.661225509643555
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,96,1,128,1,float16,fp8,0,4.084161758422852
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,96,1,128,1,fp8,fp8,0,4.445979309082031
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,96,1,128,1,float16,float16,0,6.096038436889648
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,96,2,128,1,float16,fp8,0,4.100379180908203
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,96,2,128,1,fp8,fp8,0,4.486764907836914
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,96,2,128,1,float16,float16,0,7.104222106933594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,96,4,128,1,float16,fp8,0,3.984571075439453
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,96,4,128,1,float16,float16,0,6.651953887939453
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,96,4,128,1,fp8,fp8,0,4.459161758422852
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,96,8,128,1,float16,fp8,0,4.0222431182861325
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,96,8,128,1,float16,float16,0,6.4360595703125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,96,8,128,1,fp8,fp8,0,4.245867156982422
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,96,1,128,1,float16,fp8,0,19.774473571777342
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,96,1,128,1,fp8,fp8,0,19.715364074707033
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,96,1,128,1,float16,float16,0,34.6267822265625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,96,2,128,1,float16,float16,0,32.671234130859375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,96,2,128,1,float16,fp8,0,19.730453491210938
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,96,2,128,1,fp8,fp8,0,20.431686401367188
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,96,4,128,1,float16,fp8,0,20.004075622558595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,96,4,128,1,fp8,fp8,0,19.775465393066405
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,96,4,128,1,float16,float16,0,34.62346496582031
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,96,8,128,1,float16,float16,0,33.261322021484375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,96,8,128,1,float16,fp8,0,20.602629089355467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,96,8,128,1,fp8,fp8,0,20.012771606445312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,96,96,128,1,float16,float16,0,17.788481140136717
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,96,96,128,1,fp8,fp8,0,10.577887725830077
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,96,96,128,1,float16,fp8,0,10.569905853271484
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,96,1,128,1,fp8,fp8,0,9.916185760498047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,96,1,128,1,float16,float16,0,16.61743927001953
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,96,1,128,1,float16,fp8,0,10.519802856445313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,96,2,128,1,float16,float16,0,16.89250793457031
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,96,2,128,1,float16,fp8,0,10.117132568359375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,96,2,128,1,fp8,fp8,0,9.678667449951172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,96,4,128,1,float16,fp8,0,9.831976318359375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,96,4,128,1,float16,float16,0,17.559834289550782
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,96,4,128,1,fp8,fp8,0,10.137471771240234
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,96,8,128,1,float16,float16,0,15.60704345703125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,96,8,128,1,float16,fp8,0,9.948876953125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,96,96,128,1,float16,float16,0,8.76273422241211
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,96,8,128,1,fp8,fp8,0,10.66942901611328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,96,96,128,1,float16,fp8,0,4.977332687377929
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,96,96,128,1,fp8,fp8,0,5.2687023162841795
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,96,1,128,1,float16,fp8,0,4.963908767700195
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,96,1,128,1,float16,float16,0,8.225596618652343
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,96,1,128,1,fp8,fp8,0,5.049081420898437
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,96,2,128,1,float16,fp8,0,4.977487945556641
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,96,2,128,1,float16,float16,0,8.457281494140625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,96,2,128,1,fp8,fp8,0,4.874579238891601
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,96,4,128,1,float16,fp8,0,5.103535842895508
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,96,4,128,1,float16,float16,0,6.869716644287109
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,96,4,128,1,fp8,fp8,0,4.862511825561524
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,96,8,128,1,float16,float16,0,7.586886596679688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,96,96,128,1,float16,float16,0,3.823348617553711
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,96,8,128,1,float16,fp8,0,5.442060852050782
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,96,8,128,1,fp8,fp8,0,4.71649284362793
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,96,96,128,1,float16,fp8,0,2.7599695205688475
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,96,96,128,1,fp8,fp8,0,2.7129520416259765
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,96,1,128,1,float16,float16,0,4.036492919921875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,96,1,128,1,float16,fp8,0,2.3231807708740235
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,96,1,128,1,fp8,fp8,0,2.4074384689331056
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,96,2,128,1,float16,fp8,0,2.3648815155029297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,96,2,128,1,fp8,fp8,0,2.360747146606445
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,96,2,128,1,float16,float16,0,4.336809539794922
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,96,4,128,1,float16,float16,0,2.6681440353393553
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,96,4,128,1,float16,fp8,0,2.3467008590698244
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,96,4,128,1,fp8,fp8,0,2.360918426513672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,96,8,128,1,float16,float16,0,4.303047943115234
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,96,8,128,1,fp8,fp8,0,2.3787904739379884
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,96,8,128,1,float16,fp8,0,2.497585678100586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,96,1,128,1,fp8,fp8,0,13.917198181152344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,96,1,128,1,float16,fp8,0,14.736625671386719
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,96,1,128,1,float16,float16,0,23.589657592773438
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,96,2,128,1,float16,float16,0,21.751837158203124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,96,2,128,1,float16,fp8,0,14.556727600097656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,96,2,128,1,fp8,fp8,0,15.18396453857422
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,96,4,128,1,float16,fp8,0,14.003726196289062
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,96,4,128,1,fp8,fp8,0,14.263919067382812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,96,4,128,1,float16,float16,0,23.198707580566406
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,96,8,128,1,float16,fp8,0,14.101638793945312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,96,8,128,1,float16,float16,0,24.988661193847655
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,96,96,128,1,float16,float16,0,12.52948455810547
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,96,8,128,1,fp8,fp8,0,14.473518371582031
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,96,96,128,1,float16,fp8,0,7.570473480224609
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,96,96,128,1,fp8,fp8,0,7.466331481933594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,96,1,128,1,float16,fp8,0,7.027823638916016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,96,1,128,1,fp8,fp8,0,6.866854095458985
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,96,1,128,1,float16,float16,0,11.463846588134766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,96,2,128,1,float16,fp8,0,7.19163818359375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,96,2,128,1,fp8,fp8,0,6.892327880859375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,96,2,128,1,float16,float16,0,12.212026977539063
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,96,4,128,1,float16,float16,0,11.703749084472657
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,96,4,128,1,float16,fp8,0,7.040531158447266
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,96,4,128,1,fp8,fp8,0,7.005608367919922
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,96,96,128,1,float16,float16,0,5.767022323608399
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,96,8,128,1,fp8,fp8,0,7.166340637207031
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,96,8,128,1,float16,fp8,0,6.947879791259766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,96,8,128,1,float16,float16,0,12.383092498779297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,96,96,128,1,float16,fp8,0,3.6984081268310547
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,96,96,128,1,fp8,fp8,0,3.702587127685547
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,96,1,128,1,float16,fp8,0,3.485251235961914
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,96,1,128,1,float16,float16,0,5.4566703796386715
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,96,1,128,1,fp8,fp8,0,3.3185806274414062
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,96,2,128,1,float16,float16,0,5.1620033264160154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,96,2,128,1,float16,fp8,0,3.5533199310302734
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,96,2,128,1,fp8,fp8,0,3.64117431640625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,96,4,128,1,float16,float16,0,5.448807907104492
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,96,4,128,1,fp8,fp8,0,3.402347183227539
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,96,4,128,1,float16,fp8,0,3.6068561553955076
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,96,8,128,1,float16,float16,0,5.567987060546875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,96,96,128,1,float16,float16,0,2.423716735839844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,96,8,128,1,float16,fp8,0,3.642243194580078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,96,8,128,1,fp8,fp8,0,3.5412254333496094
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,96,96,128,1,float16,fp8,0,2.116326332092285
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,96,96,128,1,fp8,fp8,0,2.208745574951172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,96,1,128,1,float16,fp8,0,1.8662368774414062
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,96,1,128,1,float16,float16,0,1.9215871810913085
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,96,1,128,1,fp8,fp8,0,1.71124324798584
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,96,2,128,1,float16,fp8,0,1.734294319152832
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,96,2,128,1,float16,float16,0,2.39770565032959
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,96,2,128,1,fp8,fp8,0,2.0040447235107424
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,96,4,128,1,float16,float16,0,2.4517423629760744
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,96,4,128,1,float16,fp8,0,1.7636991500854493
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,96,8,128,1,float16,float16,0,2.1111440658569336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,96,4,128,1,fp8,fp8,0,2.349857521057129
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,96,8,128,1,float16,fp8,0,2.0049951553344725
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,96,8,128,1,fp8,fp8,0,1.8584320068359375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,96,1,128,1,float16,fp8,0,18.136244201660155
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,96,1,128,1,fp8,fp8,0,18.697918701171876
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,96,1,128,1,float16,float16,0,29.66676025390625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,96,2,128,1,float16,float16,0,32.01631469726563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,96,2,128,1,float16,fp8,0,19.10924377441406
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,96,2,128,1,fp8,fp8,0,19.138352966308595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,96,4,128,1,float16,fp8,0,19.756979370117186
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,96,4,128,1,fp8,fp8,0,18.76048126220703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,96,4,128,1,float16,float16,0,29.933920288085936
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,96,8,128,1,float16,float16,0,30.79229736328125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,96,8,128,1,float16,fp8,0,20.611892700195312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,96,8,128,1,fp8,fp8,0,18.872099304199217
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,96,96,128,1,float16,float16,0,16.18860778808594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,96,96,128,1,float16,fp8,0,9.906041717529297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,96,96,128,1,fp8,fp8,0,10.695209503173828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,96,1,128,1,float16,fp8,0,8.8915283203125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,96,1,128,1,fp8,fp8,0,9.397115325927734
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,96,1,128,1,float16,float16,0,15.348612976074218
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,96,2,128,1,float16,fp8,0,8.942031860351562
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,96,2,128,1,fp8,fp8,0,9.523175811767578
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,96,2,128,1,float16,float16,0,16.452169799804686
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,96,4,128,1,float16,fp8,0,9.25364990234375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,96,4,128,1,float16,float16,0,15.013656616210938
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,96,4,128,1,fp8,fp8,0,9.521033477783202
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,96,8,128,1,float16,fp8,0,9.329176330566407
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,96,8,128,1,fp8,fp8,0,9.18868179321289
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,96,8,128,1,float16,float16,0,15.800515747070312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,96,96,128,1,float16,float16,0,8.364249420166015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,96,96,128,1,float16,fp8,0,4.8445487976074215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,96,96,128,1,fp8,fp8,0,4.821675109863281
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,96,1,128,1,float16,fp8,0,4.48778076171875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,96,1,128,1,float16,float16,0,7.6985313415527346
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,96,1,128,1,fp8,fp8,0,4.532094573974609
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,96,2,128,1,float16,float16,0,7.543170928955078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,96,2,128,1,float16,fp8,0,4.774647903442383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,96,2,128,1,fp8,fp8,0,4.251369476318359
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,96,4,128,1,float16,float16,0,7.078105926513672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,96,4,128,1,fp8,fp8,0,4.678091049194336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,96,4,128,1,float16,fp8,0,4.62596321105957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,96,8,128,1,float16,float16,0,6.810234832763672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,96,8,128,1,float16,fp8,0,4.693356704711914
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,96,96,128,1,float16,float16,0,3.473244857788086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,96,96,128,1,float16,fp8,0,2.4339727401733398
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,96,96,128,1,fp8,fp8,0,2.4165584564208986
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,96,8,128,1,fp8,fp8,0,4.980209732055664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,96,1,128,1,float16,fp8,0,2.1545055389404295
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,96,1,128,1,float16,float16,0,3.4366878509521483
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,96,1,128,1,fp8,fp8,0,2.2228815078735353
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,96,2,128,1,float16,float16,0,3.239643096923828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,96,2,128,1,float16,fp8,0,2.176993560791016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,96,2,128,1,fp8,fp8,0,2.525284767150879
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,96,4,128,1,float16,float16,0,3.127123260498047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,96,4,128,1,float16,fp8,0,2.4270320892333985
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,96,4,128,1,fp8,fp8,0,2.167087936401367
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,96,96,128,1,float16,float16,0,1.3433712005615235
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,96,8,128,1,float16,float16,0,3.2206256866455076
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,96,8,128,1,float16,fp8,0,2.492763137817383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,96,8,128,1,fp8,fp8,0,2.1832847595214844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,96,96,128,1,float16,fp8,0,1.2827391624450684
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,96,1,128,1,float16,float16,0,1.3143952369689942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,96,1,128,1,float16,fp8,0,1.2919119834899901
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,96,96,128,1,fp8,fp8,0,1.556715202331543
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,96,1,128,1,fp8,fp8,0,1.2322416305541992
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,96,2,128,1,fp8,fp8,0,1.129099178314209
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,96,2,128,1,float16,fp8,0,1.14518404006958
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,96,2,128,1,float16,float16,0,1.623591995239258
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,96,4,128,1,float16,float16,0,1.2566047668457032
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,96,4,128,1,float16,fp8,0,1.1225215911865234
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,96,4,128,1,fp8,fp8,0,1.2195247650146483
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,96,8,128,1,float16,fp8,0,1.315839958190918
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,96,8,128,1,float16,float16,0,1.7522607803344727
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,96,8,128,1,fp8,fp8,0,1.1282879829406738
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,96,1,128,1,float16,fp8,0,10.352772521972657
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,96,1,128,1,fp8,fp8,0,10.764631652832032
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,96,2,128,1,float16,float16,0,17.693728637695312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,96,1,128,1,float16,float16,0,18.813829040527345
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,96,2,128,1,float16,fp8,0,10.649364471435547
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,96,2,128,1,fp8,fp8,0,10.64110107421875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,96,4,128,1,float16,fp8,0,11.093220520019532
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,96,4,128,1,fp8,fp8,0,10.792361450195312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,96,4,128,1,float16,float16,0,17.639860534667967
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,96,8,128,1,float16,float16,0,17.8886474609375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,96,8,128,1,float16,fp8,0,11.519739532470703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,96,8,128,1,fp8,fp8,0,10.779357147216796
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,96,96,128,1,float16,float16,0,9.618179321289062
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,96,96,128,1,float16,fp8,0,5.715619277954102
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,96,96,128,1,fp8,fp8,0,6.170102310180664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,96,1,128,1,float16,fp8,0,5.236572647094727
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,96,1,128,1,float16,float16,0,8.546340942382812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,96,1,128,1,fp8,fp8,0,5.232102584838867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,96,2,128,1,float16,fp8,0,5.30194091796875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,96,2,128,1,float16,float16,0,9.387582397460937
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,96,2,128,1,fp8,fp8,0,5.2124481201171875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,96,4,128,1,float16,float16,0,8.766092681884766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,96,4,128,1,float16,fp8,0,5.342561721801758
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,96,4,128,1,fp8,fp8,0,5.507385635375977
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,96,8,128,1,float16,float16,0,8.331819152832031
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,96,8,128,1,float16,fp8,0,5.390164947509765
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,96,8,128,1,fp8,fp8,0,5.362897491455078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,96,96,128,1,float16,float16,0,4.478265762329102
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,96,96,128,1,float16,fp8,0,2.787575912475586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,96,96,128,1,fp8,fp8,0,2.7994863510131838
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,96,1,128,1,float16,fp8,0,3.007142448425293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,96,1,128,1,float16,float16,0,4.124887847900391
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,96,1,128,1,fp8,fp8,0,2.60849609375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,96,2,128,1,float16,float16,0,3.3238704681396483
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,96,2,128,1,float16,fp8,0,2.601348876953125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,96,2,128,1,fp8,fp8,0,2.7155487060546877
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,96,4,128,1,float16,float16,0,3.0746912002563476
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,96,4,128,1,float16,fp8,0,2.877811241149902
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,96,4,128,1,fp8,fp8,0,2.7282943725585938
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,96,8,128,1,float16,fp8,0,3.1450016021728517
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,96,8,128,1,fp8,fp8,0,2.532841682434082
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,96,8,128,1,float16,float16,0,4.168260955810547
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,96,96,128,1,float16,float16,0,1.8910015106201172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,96,96,128,1,float16,fp8,0,1.4896559715270996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,96,96,128,1,fp8,fp8,0,1.4509504318237305
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,96,1,128,1,float16,float16,0,1.9761743545532227
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,96,1,128,1,float16,fp8,0,1.8728256225585938
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,96,1,128,1,fp8,fp8,0,1.2894288063049317
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,96,2,128,1,float16,float16,0,1.4225263595581055
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,96,2,128,1,fp8,fp8,0,1.4308112144470215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,96,2,128,1,float16,fp8,0,1.719808006286621
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,96,4,128,1,float16,float16,0,1.4754032135009765
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,96,4,128,1,float16,fp8,0,1.5291664123535156
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,96,4,128,1,fp8,fp8,0,1.2757776260375977
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,96,8,128,1,float16,float16,0,1.6830799102783203
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,96,96,128,1,float16,float16,0,0.8555952072143554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,96,8,128,1,float16,fp8,0,1.535102367401123
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,96,8,128,1,fp8,fp8,0,1.3583968162536622
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,96,96,128,1,float16,fp8,0,1.0198975563049317
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,96,1,128,1,float16,float16,0,0.7604144096374512
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,96,96,128,1,fp8,fp8,0,0.8374143600463867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,96,1,128,1,float16,fp8,0,0.7838175773620606
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,96,1,128,1,fp8,fp8,0,0.7121359825134277
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,96,2,128,1,float16,float16,0,0.8640447616577148
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,96,2,128,1,float16,fp8,0,0.680295991897583
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,96,2,128,1,fp8,fp8,0,0.7829775810241699
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,96,4,128,1,float16,float16,0,1.046568012237549
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,96,4,128,1,float16,fp8,0,0.680291223526001
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,96,4,128,1,fp8,fp8,0,0.6735695838928223
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,96,8,128,1,float16,fp8,0,0.6788432121276855
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,96,8,128,1,float16,float16,0,0.9917759895324707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,96,8,128,1,fp8,fp8,0,0.6738800048828125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,96,1,128,1,fp8,fp8,0,9.676585388183593
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,96,1,128,1,float16,fp8,0,10.39933090209961
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,96,1,128,1,float16,float16,0,16.166963195800783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,96,2,128,1,float16,float16,0,15.1499755859375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,96,2,128,1,float16,fp8,0,9.840795135498047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,96,2,128,1,fp8,fp8,0,10.644249725341798
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,96,4,128,1,float16,fp8,0,10.006639862060547
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,96,4,128,1,fp8,fp8,0,10.4808349609375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,96,4,128,1,float16,float16,0,16.77904510498047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,96,8,128,1,float16,fp8,0,10.093534088134765
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,96,8,128,1,float16,float16,0,18.087538146972655
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,96,8,128,1,fp8,fp8,0,10.311212921142578
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,96,96,128,1,float16,float16,0,9.207080078125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,96,96,128,1,float16,fp8,0,5.660327911376953
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,96,96,128,1,fp8,fp8,0,5.9273937225341795
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,96,1,128,1,float16,fp8,0,4.844814300537109
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,96,1,128,1,fp8,fp8,0,4.84390869140625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,96,1,128,1,float16,float16,0,8.08511962890625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,96,2,128,1,float16,fp8,0,4.968580627441407
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,96,2,128,1,float16,float16,0,8.073441314697266
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,96,2,128,1,fp8,fp8,0,4.889416122436524
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,96,4,128,1,float16,float16,0,8.041574096679687
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,96,4,128,1,float16,fp8,0,5.081036758422852
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,96,4,128,1,fp8,fp8,0,5.2771953582763675
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,96,8,128,1,float16,float16,0,7.929118347167969
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,96,8,128,1,fp8,fp8,0,5.015676879882813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,96,8,128,1,float16,fp8,0,5.093966293334961
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,96,96,128,1,float16,float16,0,4.133947372436523
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,96,96,128,1,float16,fp8,0,2.900716781616211
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,96,96,128,1,fp8,fp8,0,2.901193618774414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,96,1,128,1,float16,float16,0,3.619619369506836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,96,1,128,1,float16,fp8,0,3.03514404296875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,96,1,128,1,fp8,fp8,0,2.4006320953369142
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,96,2,128,1,float16,fp8,0,2.491089630126953
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,96,2,128,1,float16,float16,0,3.683707046508789
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,96,2,128,1,fp8,fp8,0,2.4377456665039063
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,96,4,128,1,float16,float16,0,3.5049358367919923
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,96,4,128,1,float16,fp8,0,2.4531248092651365
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,96,4,128,1,fp8,fp8,0,2.556879997253418
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,96,8,128,1,float16,float16,0,3.6727680206298827
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,96,8,128,1,float16,fp8,0,2.474015998840332
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,96,96,128,1,float16,float16,0,1.727849578857422
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,96,8,128,1,fp8,fp8,0,2.404248046875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,96,96,128,1,fp8,fp8,0,1.41625919342041
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,96,96,128,1,float16,fp8,0,1.8652351379394532
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,96,1,128,1,float16,fp8,0,1.2430416107177735
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,96,1,128,1,float16,float16,0,1.8893264770507812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,96,1,128,1,fp8,fp8,0,1.2303104400634766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,96,2,128,1,float16,float16,0,1.3503168106079102
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,96,2,128,1,float16,fp8,0,1.2251359939575195
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,96,4,128,1,float16,float16,0,1.324732780456543
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,96,2,128,1,fp8,fp8,0,1.6394752502441405
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,96,4,128,1,float16,fp8,0,1.2375663757324218
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,96,4,128,1,fp8,fp8,0,1.2304880142211914
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,96,8,128,1,float16,fp8,0,1.2372079849243165
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,96,96,128,1,float16,float16,0,0.8149151802062988
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,96,8,128,1,fp8,fp8,0,1.229532814025879
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,96,8,128,1,float16,float16,0,1.6653312683105468
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,96,96,128,1,float16,fp8,0,0.7633071899414062
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,96,96,128,1,fp8,fp8,0,0.7307759761810303
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,96,1,128,1,float16,float16,0,0.7059167861938477
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,96,1,128,1,float16,fp8,0,0.8079024314880371
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,96,1,128,1,fp8,fp8,0,0.6389567852020264
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,96,2,128,1,float16,float16,0,0.701582384109497
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,96,2,128,1,float16,fp8,0,0.6691232204437256
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,96,2,128,1,fp8,fp8,0,0.7305520057678223
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,96,4,128,1,float16,float16,0,0.7077104091644287
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,96,4,128,1,float16,fp8,0,0.6362895965576172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,96,4,128,1,fp8,fp8,0,0.6446896076202393
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,96,8,128,1,float16,float16,0,0.6975359916687012
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,96,96,128,1,float16,float16,0,0.4339712142944336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,96,8,128,1,fp8,fp8,0,0.6354944229125976
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,96,8,128,1,float16,fp8,0,0.7795775890350342
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,96,96,128,1,float16,fp8,0,0.48855037689208985
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,96,96,128,1,fp8,fp8,0,0.3953632116317749
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,96,1,128,1,float16,float16,0,0.3800640106201172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,96,1,128,1,float16,fp8,0,0.376473593711853
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,96,1,128,1,fp8,fp8,0,0.40149922370910646
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,96,2,128,1,float16,float16,0,0.3842096090316772
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,96,2,128,1,float16,fp8,0,0.3443583965301514
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,96,2,128,1,fp8,fp8,0,0.34375040531158446
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,96,4,128,1,float16,float16,0,0.375817608833313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,96,4,128,1,float16,fp8,0,0.36677279472351076
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,96,4,128,1,fp8,fp8,0,0.34385440349578855
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,96,8,128,1,float16,fp8,0,0.34007840156555175
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,96,8,128,1,float16,float16,0,0.3839967966079712
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,96,8,128,1,fp8,fp8,0,0.3526144027709961
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,96,1,128,1,float16,fp8,0,5.823923110961914
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,96,1,128,1,fp8,fp8,0,6.022288131713867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,96,1,128,1,float16,float16,0,8.604262542724609
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,96,2,128,1,float16,fp8,0,5.872094345092774
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,96,2,128,1,fp8,fp8,0,6.286471939086914
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,96,2,128,1,float16,float16,0,8.146812438964844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,96,4,128,1,float16,float16,0,9.585406494140624
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,96,4,128,1,float16,fp8,0,5.945225524902344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,96,4,128,1,fp8,fp8,0,6.274041748046875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,96,8,128,1,float16,float16,0,8.633030700683594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,96,8,128,1,float16,fp8,0,6.21090087890625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,96,8,128,1,fp8,fp8,0,6.169047927856445
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,96,96,128,1,float16,float16,0,5.7466896057128904
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,96,96,128,1,float16,fp8,0,3.48455696105957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,96,96,128,1,fp8,fp8,0,3.4901264190673826
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,96,1,128,1,float16,float16,0,4.189852905273438
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,96,1,128,1,float16,fp8,0,2.928772735595703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,96,1,128,1,fp8,fp8,0,2.9113855361938477
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,96,2,128,1,float16,float16,0,3.9539169311523437
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,96,2,128,1,fp8,fp8,0,2.959550476074219
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,96,2,128,1,float16,fp8,0,2.9733104705810547
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,96,4,128,1,float16,float16,0,4.259215927124023
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,96,4,128,1,fp8,fp8,0,2.9586463928222657
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,96,4,128,1,float16,fp8,0,2.9410175323486327
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,96,8,128,1,float16,float16,0,4.668246459960938
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,96,8,128,1,float16,fp8,0,3.1774335861206056
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,96,96,128,1,float16,float16,0,1.95882568359375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,96,8,128,1,fp8,fp8,0,3.2020111083984375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,96,96,128,1,float16,fp8,0,1.836836814880371
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,96,96,128,1,fp8,fp8,0,1.7912303924560546
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,96,1,128,1,float16,float16,0,2.319932746887207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,96,1,128,1,float16,fp8,0,1.6304912567138672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,96,1,128,1,fp8,fp8,0,1.4747743606567383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,96,2,128,1,float16,float16,0,1.5862159729003906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,96,2,128,1,float16,fp8,0,1.6513999938964843
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,96,2,128,1,fp8,fp8,0,1.4574336051940917
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,96,4,128,1,float16,float16,0,1.6123455047607422
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,96,4,128,1,float16,fp8,0,1.769500732421875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,96,4,128,1,fp8,fp8,0,1.4687824249267578
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,96,8,128,1,float16,float16,0,1.5883888244628905
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,96,8,128,1,float16,fp8,0,1.859823989868164
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,96,96,128,1,float16,float16,0,0.9972751617431641
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,96,8,128,1,fp8,fp8,0,1.4906463623046875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,96,96,128,1,float16,fp8,0,1.3700832366943358
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,96,1,128,1,float16,float16,0,0.8168399810791016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,96,96,128,1,fp8,fp8,0,0.9416303634643555
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,96,1,128,1,float16,fp8,0,0.7547359943389893
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,96,1,128,1,fp8,fp8,0,0.8028623580932617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,96,2,128,1,float16,fp8,0,0.7542863845825195
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,96,2,128,1,fp8,fp8,0,0.7503439903259277
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,96,2,128,1,float16,float16,0,1.0963791847229003
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,96,4,128,1,float16,float16,0,0.8336848258972168
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,96,4,128,1,float16,fp8,0,0.75763840675354
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,96,4,128,1,fp8,fp8,0,0.7502128124237061
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,96,8,128,1,float16,float16,0,1.0451279640197755
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,96,96,128,1,float16,float16,0,0.5130208015441895
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,96,8,128,1,float16,fp8,0,0.8265647888183594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,96,8,128,1,fp8,fp8,0,0.7549615859985351
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,96,96,128,1,fp8,fp8,0,0.4698351860046387
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,96,1,128,1,float16,float16,0,0.43836002349853515
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,96,96,128,1,float16,fp8,0,0.6800975799560547
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,96,1,128,1,float16,fp8,0,0.3987375974655151
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,96,1,128,1,fp8,fp8,0,0.39834721088409425
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,96,2,128,1,float16,fp8,0,0.4327983856201172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,96,2,128,1,float16,float16,0,0.43263678550720214
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,96,2,128,1,fp8,fp8,0,0.3988384008407593
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,96,4,128,1,float16,float16,0,0.4296224117279053
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,96,4,128,1,fp8,fp8,0,0.3978640079498291
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,96,4,128,1,float16,fp8,0,0.4328464031219482
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,96,8,128,1,float16,float16,0,0.44158082008361815
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,96,8,128,1,float16,fp8,0,0.3951296091079712
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,96,96,128,1,float16,float16,0,0.2861696004867554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,96,96,128,1,float16,fp8,0,0.25771679878234866
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,96,8,128,1,fp8,fp8,0,0.3954960107803345
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,96,96,128,1,fp8,fp8,0,0.25496640205383303
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,96,1,128,1,float16,fp8,0,0.2201904058456421
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,96,1,128,1,float16,float16,0,0.2397696018218994
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,96,1,128,1,fp8,fp8,0,0.2160799980163574
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,96,2,128,1,float16,float16,0,0.2348031997680664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,96,2,128,1,float16,fp8,0,0.21998560428619385
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,96,2,128,1,fp8,fp8,0,0.21830880641937256
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,96,4,128,1,float16,float16,0,0.23910560607910156
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,96,4,128,1,float16,fp8,0,0.21681439876556396
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,96,4,128,1,fp8,fp8,0,0.21821119785308837
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,96,8,128,1,float16,float16,0,0.24169280529022216
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,96,8,128,1,float16,fp8,0,0.21668798923492433
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,96,8,128,1,fp8,fp8,0,0.2156224012374878
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,96,1,128,1,float16,fp8,0,5.799887847900391
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,96,1,128,1,fp8,fp8,0,5.824703979492187
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,96,2,128,1,float16,float16,0,8.302642822265625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,96,1,128,1,float16,float16,0,8.705738830566407
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,96,2,128,1,float16,fp8,0,5.826224136352539
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,96,2,128,1,fp8,fp8,0,5.888187026977539
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,96,4,128,1,float16,fp8,0,5.84717903137207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,96,4,128,1,float16,float16,0,8.800182342529297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,96,4,128,1,fp8,fp8,0,5.791321563720703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,96,8,128,1,float16,fp8,0,5.899639892578125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,96,8,128,1,float16,float16,0,9.590415954589844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,96,96,128,1,float16,float16,0,5.008615875244141
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,96,8,128,1,fp8,fp8,0,5.926375961303711
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,96,96,128,1,float16,fp8,0,3.706870269775391
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,96,1,128,1,float16,float16,0,3.7993999481201173
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,96,96,128,1,fp8,fp8,0,3.7042465209960938
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,96,1,128,1,float16,fp8,0,2.9266336441040037
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,96,1,128,1,fp8,fp8,0,3.1359296798706056
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,96,2,128,1,float16,fp8,0,2.938203239440918
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,96,2,128,1,float16,float16,0,3.9533615112304688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,96,2,128,1,fp8,fp8,0,2.9004064559936524
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,96,4,128,1,float16,fp8,0,2.9258256912231446
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,96,4,128,1,float16,float16,0,3.527755355834961
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,96,4,128,1,fp8,fp8,0,2.915727996826172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,96,8,128,1,float16,float16,0,3.788227081298828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,96,8,128,1,float16,fp8,0,2.9234384536743163
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,96,96,128,1,float16,float16,0,2.3371631622314455
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,96,8,128,1,fp8,fp8,0,2.968124771118164
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,96,96,128,1,float16,fp8,0,2.1167184829711916
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,96,1,128,1,float16,fp8,0,1.4876015663146973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,96,96,128,1,fp8,fp8,0,1.8701087951660156
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,96,1,128,1,float16,float16,0,2.1466192245483398
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,96,1,128,1,fp8,fp8,0,1.4712703704833985
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,96,2,128,1,float16,float16,0,1.528715229034424
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,96,2,128,1,float16,fp8,0,1.4779439926147462
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,96,2,128,1,fp8,fp8,0,1.6866064071655273
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,96,4,128,1,float16,float16,0,1.5471088409423828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,96,4,128,1,fp8,fp8,0,1.4858223915100097
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,96,4,128,1,float16,fp8,0,1.6302207946777343
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,96,8,128,1,float16,float16,0,1.6610368728637694
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,96,8,128,1,float16,fp8,0,1.9279903411865233
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,96,96,128,1,float16,float16,0,1.079843235015869
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,96,8,128,1,fp8,fp8,0,1.482862377166748
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,96,96,128,1,float16,fp8,0,1.0528032302856445
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,96,1,128,1,float16,float16,0,0.895297622680664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,96,96,128,1,fp8,fp8,0,0.9846943855285645
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,96,1,128,1,float16,fp8,0,0.7603472232818603
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,96,1,128,1,fp8,fp8,0,0.7559919834136963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,96,2,128,1,float16,fp8,0,0.7513296127319335
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,96,2,128,1,float16,float16,0,0.9151583671569824
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,96,2,128,1,fp8,fp8,0,0.7549551963806153
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,96,4,128,1,float16,float16,0,0.8016799926757813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,96,4,128,1,float16,fp8,0,0.7510208129882813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,96,8,128,1,float16,float16,0,0.8145520210266113
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,96,4,128,1,fp8,fp8,0,1.0189040184020997
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,96,8,128,1,float16,fp8,0,0.7705264091491699
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,96,8,128,1,fp8,fp8,0,0.7509183883666992
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,96,96,128,1,float16,float16,0,0.5405519962310791
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,96,96,128,1,float16,fp8,0,0.4929840087890625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,96,96,128,1,fp8,fp8,0,0.5014575958251953
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,96,1,128,1,float16,fp8,0,0.40781278610229493
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,96,1,128,1,float16,float16,0,0.4204432010650635
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,96,1,128,1,fp8,fp8,0,0.3919471979141235
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,96,2,128,1,float16,float16,0,0.42201762199401854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,96,2,128,1,float16,fp8,0,0.4078335762023926
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,96,2,128,1,fp8,fp8,0,0.39152159690856936
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,96,4,128,1,float16,float16,0,0.41722559928894043
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,96,4,128,1,float16,fp8,0,0.40816478729248046
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,96,4,128,1,fp8,fp8,0,0.3933680057525635
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,96,8,128,1,float16,fp8,0,0.3913568019866943
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,96,8,128,1,float16,float16,0,0.42557439804077146
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,96,96,128,1,float16,float16,0,0.28788321018218993
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,96,8,128,1,fp8,fp8,0,0.4106192111968994
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,96,96,128,1,float16,fp8,0,0.26105918884277346
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,96,96,128,1,fp8,fp8,0,0.2629744052886963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,96,1,128,1,float16,float16,0,0.23132638931274413
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,96,1,128,1,float16,fp8,0,0.2127232074737549
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,96,1,128,1,fp8,fp8,0,0.21159040927886963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,96,2,128,1,float16,float16,0,0.2267103910446167
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,96,2,128,1,float16,fp8,0,0.21224799156188964
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,96,2,128,1,fp8,fp8,0,0.2130431890487671
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,96,4,128,1,float16,float16,0,0.22496480941772462
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,96,4,128,1,float16,fp8,0,0.2114176034927368
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,96,4,128,1,fp8,fp8,0,0.21217920780181884
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,96,8,128,1,float16,fp8,0,0.2112816095352173
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,96,8,128,1,fp8,fp8,0,0.21131999492645265
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,96,8,128,1,float16,float16,0,0.23157598972320556
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,96,96,128,1,float16,float16,0,0.1607151985168457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,96,1,128,1,float16,float16,0,0.12700159549713136
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,96,96,128,1,float16,fp8,0,0.14496639966964722
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,96,96,128,1,fp8,fp8,0,0.14594240188598634
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,96,1,128,1,fp8,fp8,0,0.11810239553451538
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,96,2,128,1,float16,float16,0,0.12418080568313598
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,96,2,128,1,float16,fp8,0,0.11838400363922119
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,96,1,128,1,float16,fp8,0,0.11740000247955322
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,96,2,128,1,fp8,fp8,0,0.11842399835586548
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,96,4,128,1,float16,float16,0,0.1255247950553894
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,96,4,128,1,float16,fp8,0,0.11726560592651367
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,96,4,128,1,fp8,fp8,0,0.11844799518585206
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,96,8,128,1,float16,float16,0,0.12884479761123657
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,96,8,128,1,float16,fp8,0,0.11778240203857422
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,96,8,128,1,fp8,fp8,0,0.1164736032485962
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,96,1,128,1,float16,fp8,0,3.677675247192383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,96,1,128,1,fp8,fp8,0,3.6756366729736327
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,96,1,128,1,float16,float16,0,4.758761596679688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,96,2,128,1,float16,float16,0,4.615723037719727
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,96,2,128,1,float16,fp8,0,3.6736209869384764
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,96,2,128,1,fp8,fp8,0,3.672577667236328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,96,4,128,1,float16,fp8,0,3.7123233795166017
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,96,4,128,1,float16,float16,0,5.0852783203125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,96,4,128,1,fp8,fp8,0,3.670998382568359
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,96,8,128,1,float16,fp8,0,3.6650718688964843
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,96,8,128,1,float16,float16,0,5.122558212280273
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,96,8,128,1,fp8,fp8,0,3.6781982421875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,96,96,128,1,float16,fp8,0,2.4513696670532226
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,96,96,128,1,float16,float16,0,2.9433984756469727
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,96,96,128,1,fp8,fp8,0,2.444822311401367
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,96,1,128,1,float16,float16,0,1.9225936889648438
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,96,1,128,1,float16,fp8,0,2.1941152572631837
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,96,1,128,1,fp8,fp8,0,1.8563087463378907
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,96,2,128,1,float16,float16,0,2.3790895462036135
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,96,2,128,1,float16,fp8,0,1.8496063232421875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,96,2,128,1,fp8,fp8,0,1.86669921875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,96,4,128,1,float16,float16,0,1.9035680770874024
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,96,4,128,1,fp8,fp8,0,1.847368049621582
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,96,4,128,1,float16,fp8,0,2.162531280517578
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,96,8,128,1,float16,float16,0,2.2854240417480467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,96,8,128,1,float16,fp8,0,2.2973648071289063
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,96,96,128,1,float16,float16,0,1.3600959777832031
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,96,8,128,1,fp8,fp8,0,1.8472288131713868
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,96,96,128,1,float16,fp8,0,1.3372768402099608
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,96,96,128,1,fp8,fp8,0,1.2345135688781739
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,96,1,128,1,float16,fp8,0,0.9407135963439941
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,96,1,128,1,float16,float16,0,1.1751487731933594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,96,1,128,1,fp8,fp8,0,0.9433216094970703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,96,2,128,1,float16,float16,0,0.9705023765563965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,96,2,128,1,float16,fp8,0,0.9378576278686523
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,96,2,128,1,fp8,fp8,0,0.9865296363830567
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,96,4,128,1,float16,float16,0,1.007748794555664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,96,4,128,1,float16,fp8,0,1.0207568168640138
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,96,4,128,1,fp8,fp8,0,0.9354592323303222
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,96,8,128,1,float16,float16,0,0.9870688438415527
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,96,8,128,1,float16,fp8,0,1.03504638671875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,96,8,128,1,fp8,fp8,0,0.9385343551635742
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,96,96,128,1,float16,float16,0,0.6896527767181396
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,96,96,128,1,float16,fp8,0,0.6313295841217041
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,96,96,128,1,fp8,fp8,0,0.6335760116577148
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,96,1,128,1,float16,float16,0,0.5096528053283691
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,96,1,128,1,float16,fp8,0,0.4829904079437256
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,96,1,128,1,fp8,fp8,0,0.6181583881378174
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,96,2,128,1,float16,float16,0,0.5091551780700684
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,96,2,128,1,float16,fp8,0,0.48467202186584474
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,96,2,128,1,fp8,fp8,0,0.48130078315734864
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,96,4,128,1,float16,fp8,0,0.48748159408569336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,96,4,128,1,float16,float16,0,0.5031360149383545
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,96,4,128,1,fp8,fp8,0,0.484772777557373
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,96,8,128,1,float16,float16,0,0.5154863834381104
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,96,8,128,1,fp8,fp8,0,0.48278079032897947
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,96,96,128,1,float16,float16,0,0.3601936101913452
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,96,8,128,1,float16,fp8,0,0.4867231845855713
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,96,96,128,1,float16,fp8,0,0.33016319274902345
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,96,96,128,1,fp8,fp8,0,0.32974240779876707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,96,1,128,1,float16,fp8,0,0.25672640800476076
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,96,1,128,1,float16,float16,0,0.2714816093444824
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,96,1,128,1,fp8,fp8,0,0.2536191940307617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,96,2,128,1,float16,float16,0,0.26630239486694335
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,96,2,128,1,float16,fp8,0,0.25570080280303953
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,96,2,128,1,fp8,fp8,0,0.25597119331359863
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,96,4,128,1,float16,float16,0,0.2709072113037109
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,96,4,128,1,float16,fp8,0,0.2543951988220215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,96,4,128,1,fp8,fp8,0,0.2562367916107178
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,96,8,128,1,float16,float16,0,0.2756448030471802
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,96,8,128,1,float16,fp8,0,0.25464160442352296
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,96,96,128,1,float16,float16,0,0.19262720346450807
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,96,8,128,1,fp8,fp8,0,0.25328478813171384
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,96,96,128,1,float16,fp8,0,0.17794719934463502
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,96,96,128,1,fp8,fp8,0,0.1785215973854065
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,96,1,128,1,float16,float16,0,0.14958560466766357
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,96,1,128,1,float16,fp8,0,0.13760319948196412
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,96,1,128,1,fp8,fp8,0,0.1398159980773926
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,96,2,128,1,float16,float16,0,0.14720480442047118
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,96,2,128,1,float16,fp8,0,0.14052480459213257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,96,2,128,1,fp8,fp8,0,0.138155198097229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,96,4,128,1,float16,fp8,0,0.13907359838485717
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,96,4,128,1,float16,float16,0,0.1498576045036316
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,96,4,128,1,fp8,fp8,0,0.14007519483566283
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,96,8,128,1,float16,float16,0,0.14868799448013306
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,96,8,128,1,float16,fp8,0,0.1399775981903076
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,96,8,128,1,fp8,fp8,0,0.14008159637451173
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,96,96,128,1,float16,float16,0,0.10649919509887695
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,96,96,128,1,float16,fp8,0,0.09989280104637147
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,96,1,128,1,float16,float16,0,0.08336480259895325
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,96,96,128,1,fp8,fp8,0,0.10070240497589111
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,96,1,128,1,float16,fp8,0,0.07988799810409546
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,96,1,128,1,fp8,fp8,0,0.08035039901733398
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,96,2,128,1,float16,float16,0,0.08524320125579835
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,96,2,128,1,float16,fp8,0,0.08055359721183777
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,96,2,128,1,fp8,fp8,0,0.07976959943771363
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,96,4,128,1,float16,float16,0,0.08303359746932984
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,96,4,128,1,float16,fp8,0,0.08050879836082458
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,96,4,128,1,fp8,fp8,0,0.08009920120239258
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,96,8,128,1,float16,fp8,0,0.07909600138664245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,96,8,128,1,float16,float16,0,0.08523839712142944
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,96,8,128,1,fp8,fp8,0,0.079721599817276
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,96,1,128,1,float16,fp8,0,3.9803230285644533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,96,1,128,1,fp8,fp8,0,3.982796859741211
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,96,1,128,1,float16,float16,0,4.626971054077148
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,96,2,128,1,float16,float16,0,4.917547225952148
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,96,2,128,1,float16,fp8,0,3.9762241363525392
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,96,2,128,1,fp8,fp8,0,3.977849578857422
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,96,4,128,1,float16,fp8,0,3.973209762573242
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,96,4,128,1,float16,float16,0,5.326403045654297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,96,4,128,1,fp8,fp8,0,3.9732334136962892
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,96,8,128,1,float16,float16,0,5.238294219970703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,96,8,128,1,float16,fp8,0,3.950791931152344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,96,8,128,1,fp8,fp8,0,3.969889450073242
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,96,96,128,1,float16,float16,0,3.223432159423828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,96,96,128,1,float16,fp8,0,2.790355110168457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,96,96,128,1,fp8,fp8,0,2.7835344314575194
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,96,1,128,1,float16,float16,0,2.0190671920776366
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,96,1,128,1,float16,fp8,0,2.066324806213379
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,96,1,128,1,fp8,fp8,0,1.9964319229125977
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,96,2,128,1,float16,float16,0,2.16964168548584
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,96,2,128,1,float16,fp8,0,2.0977535247802734
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,96,2,128,1,fp8,fp8,0,2.004083251953125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,96,4,128,1,float16,float16,0,2.070439910888672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,96,4,128,1,float16,fp8,0,2.000966453552246
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,96,4,128,1,fp8,fp8,0,1.999060821533203
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,96,8,128,1,float16,float16,0,2.2640512466430662
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,96,8,128,1,float16,fp8,0,2.2074480056762695
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,96,96,128,1,float16,float16,0,1.5017840385437011
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,96,8,128,1,fp8,fp8,0,1.9972784042358398
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,96,96,128,1,fp8,fp8,0,1.4169872283935547
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,96,96,128,1,float16,fp8,0,1.6889328002929687
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,96,1,128,1,float16,float16,0,1.0994815826416016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,96,1,128,1,float16,fp8,0,1.0160112380981445
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,96,2,128,1,float16,float16,0,1.023265552520752
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,96,1,128,1,fp8,fp8,0,1.0110848426818848
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,96,2,128,1,float16,fp8,0,1.040880012512207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,96,2,128,1,fp8,fp8,0,1.0136960029602051
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,96,4,128,1,float16,fp8,0,1.0081263542175294
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,96,4,128,1,fp8,fp8,0,1.0116016387939453
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,96,4,128,1,float16,float16,0,1.2481087684631347
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,96,8,128,1,float16,float16,0,1.0482463836669922
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,96,96,128,1,float16,fp8,0,0.7144735813140869
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,96,8,128,1,fp8,fp8,0,1.0097439765930176
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,96,8,128,1,float16,fp8,0,1.00523042678833
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,96,96,128,1,float16,float16,0,0.9517168045043946
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,96,1,128,1,float16,float16,0,0.5217040061950684
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,96,1,128,1,float16,fp8,0,0.5156816005706787
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,96,96,128,1,fp8,fp8,0,0.7155055999755859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,96,1,128,1,fp8,fp8,0,0.6849664211273193
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,96,2,128,1,float16,float16,0,0.5231008052825927
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,96,2,128,1,float16,fp8,0,0.516428804397583
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,96,2,128,1,fp8,fp8,0,0.5190159797668457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,96,4,128,1,float16,float16,0,0.5350656032562255
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,96,4,128,1,float16,fp8,0,0.5335775852203369
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,96,4,128,1,fp8,fp8,0,0.514792013168335
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,96,8,128,1,float16,float16,0,0.5418799877166748
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,96,8,128,1,float16,fp8,0,0.5170639991760254
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,96,96,128,1,float16,float16,0,0.39875359535217286
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,96,8,128,1,fp8,fp8,0,0.5327167987823487
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,96,96,128,1,float16,fp8,0,0.3687999963760376
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,96,96,128,1,fp8,fp8,0,0.3700239896774292
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,96,1,128,1,float16,float16,0,0.278873610496521
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,96,1,128,1,fp8,fp8,0,0.26987519264221194
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,96,1,128,1,float16,fp8,0,0.2708479881286621
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,96,2,128,1,float16,float16,0,0.27994399070739745
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,96,2,128,1,float16,fp8,0,0.2689728021621704
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,96,2,128,1,fp8,fp8,0,0.27674560546875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,96,4,128,1,float16,float16,0,0.28148798942565917
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,96,4,128,1,float16,fp8,0,0.270580792427063
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,96,4,128,1,fp8,fp8,0,0.2685168027877808
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,96,8,128,1,float16,float16,0,0.2857568025588989
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,96,8,128,1,fp8,fp8,0,0.27026240825653075
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,96,8,128,1,float16,fp8,0,0.2684288024902344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,96,96,128,1,float16,float16,0,0.21187679767608641
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,96,96,128,1,float16,fp8,0,0.19617439508438111
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,96,96,128,1,fp8,fp8,0,0.1969920039176941
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,96,1,128,1,float16,float16,0,0.1492751955986023
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,96,1,128,1,float16,fp8,0,0.1451359987258911
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,96,1,128,1,fp8,fp8,0,0.1454416036605835
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,96,2,128,1,float16,float16,0,0.15144480466842652
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,96,2,128,1,float16,fp8,0,0.1442415952682495
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,96,2,128,1,fp8,fp8,0,0.14451999664306642
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,96,4,128,1,float16,float16,0,0.15275360345840455
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,96,4,128,1,fp8,fp8,0,0.14430559873580934
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,96,8,128,1,float16,float16,0,0.15369119644165039
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,96,4,128,1,float16,fp8,0,0.14639999866485595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,96,8,128,1,float16,fp8,0,0.14529759883880616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,96,8,128,1,fp8,fp8,0,0.14402719736099243
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,96,96,128,1,float16,float16,0,0.11733440160751343
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,96,96,128,1,float16,fp8,0,0.1095039963722229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,96,96,128,1,fp8,fp8,0,0.10772000551223755
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,96,1,128,1,float16,float16,0,0.08287360072135926
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,96,1,128,1,float16,fp8,0,0.08093760013580323
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,96,1,128,1,fp8,fp8,0,0.08055040240287781
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,96,2,128,1,float16,float16,0,0.08479200005531311
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,96,2,128,1,float16,fp8,0,0.08000159859657288
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,96,2,128,1,fp8,fp8,0,0.08110880255699157
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,96,4,128,1,float16,float16,0,0.08442400097846985
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,96,4,128,1,float16,fp8,0,0.08037919998168945
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,96,4,128,1,fp8,fp8,0,0.07972319722175598
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,96,8,128,1,float16,float16,0,0.08606560230255127
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,96,8,128,1,float16,fp8,0,0.08103520274162293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,96,8,128,1,fp8,fp8,0,0.08097440004348755
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,96,96,128,1,float16,float16,0,0.06771199703216553
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,96,96,128,1,float16,fp8,0,0.06188960075378418
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,96,96,128,1,fp8,fp8,0,0.06201279759407043
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,96,1,128,1,float16,float16,0,0.0514735996723175
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,96,1,128,1,float16,fp8,0,0.04935519993305206
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,96,1,128,1,fp8,fp8,0,0.04906719923019409
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,96,2,128,1,float16,float16,0,0.05106719732284546
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,96,2,128,1,float16,fp8,0,0.04967840015888214
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,96,4,128,1,float16,float16,0,0.05165759921073913
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,96,2,128,1,fp8,fp8,0,0.049833598732948306
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,96,4,128,1,float16,fp8,0,0.049435201287269595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,96,4,128,1,fp8,fp8,0,0.04990400075912475
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,96,8,128,1,float16,float16,0,0.05143359899520874
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,96,8,128,1,float16,fp8,0,0.04961279928684235
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,96,8,128,1,fp8,fp8,0,0.0497871994972229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,96,1,128,1,float16,float16,0,3.1223440170288086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,96,1,128,1,float16,fp8,0,3.0638896942138674
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,96,1,128,1,fp8,fp8,0,3.0660688400268556
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,96,2,128,1,float16,fp8,0,3.0581232070922852
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,96,2,128,1,fp8,fp8,0,3.058734321594238
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,96,2,128,1,float16,float16,0,3.3752464294433593
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,96,4,128,1,float16,float16,0,3.024555206298828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,96,4,128,1,float16,fp8,0,3.0555919647216796
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,96,4,128,1,fp8,fp8,0,3.0562816619873048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,96,8,128,1,float16,float16,0,3.674176025390625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,96,8,128,1,float16,fp8,0,3.0508256912231446
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,96,96,128,1,float16,float16,0,2.5518495559692385
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,96,8,128,1,fp8,fp8,0,3.0495887756347657
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,96,96,128,1,float16,fp8,0,2.3146448135375977
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,96,1,128,1,float16,float16,0,1.5100879669189453
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,96,1,128,1,float16,fp8,0,1.543286418914795
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,96,1,128,1,fp8,fp8,0,1.537343978881836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,96,96,128,1,fp8,fp8,0,2.308435249328613
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,96,2,128,1,float16,float16,0,1.6833456039428711
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,96,2,128,1,float16,fp8,0,1.5396559715270997
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,96,2,128,1,fp8,fp8,0,1.5325200080871582
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,96,4,128,1,float16,float16,0,1.5185935974121094
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,96,4,128,1,float16,fp8,0,1.6211952209472655
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,96,4,128,1,fp8,fp8,0,1.5373855590820313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,96,8,128,1,float16,float16,0,1.5812911987304688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,96,8,128,1,float16,fp8,0,1.6425504684448242
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,96,96,128,1,float16,float16,0,1.2450240135192872
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,96,96,128,1,float16,fp8,0,1.1912495613098144
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,96,8,128,1,fp8,fp8,0,1.534062385559082
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,96,96,128,1,fp8,fp8,0,1.1640640258789063
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,96,1,128,1,float16,float16,0,0.7665631771087646
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,96,1,128,1,float16,fp8,0,0.7775424003601075
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,96,1,128,1,fp8,fp8,0,0.7817168235778809
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,96,2,128,1,float16,float16,0,0.7619391918182373
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,96,2,128,1,float16,fp8,0,0.7794847965240479
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,96,2,128,1,fp8,fp8,0,0.7764143943786621
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,96,4,128,1,float16,float16,0,0.7732719898223877
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,96,4,128,1,float16,fp8,0,0.7781487941741944
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,96,4,128,1,fp8,fp8,0,0.7784495830535889
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,96,8,128,1,float16,float16,0,0.7889056205749512
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,96,8,128,1,float16,fp8,0,0.7805263996124268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,96,8,128,1,fp8,fp8,0,0.7764768123626709
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,96,96,128,1,float16,float16,0,0.6366303920745849
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,96,96,128,1,float16,fp8,0,0.5929359912872314
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,96,1,128,1,float16,float16,0,0.3921295881271362
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,96,96,128,1,fp8,fp8,0,0.5932576179504394
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,96,1,128,1,float16,fp8,0,0.3999936103820801
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,96,1,128,1,fp8,fp8,0,0.39775519371032714
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,96,2,128,1,float16,float16,0,0.39172160625457764
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,96,2,128,1,float16,fp8,0,0.39881119728088377
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,96,2,128,1,fp8,fp8,0,0.39829120635986326
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,96,4,128,1,float16,float16,0,0.398142409324646
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,96,4,128,1,fp8,fp8,0,0.3984800100326538
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,96,4,128,1,float16,fp8,0,0.39796159267425535
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,96,8,128,1,float16,float16,0,0.4083392143249512
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,96,8,128,1,float16,fp8,0,0.3955951929092407
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,96,96,128,1,float16,float16,0,0.32763359546661375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,96,8,128,1,fp8,fp8,0,0.3971951961517334
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,96,96,128,1,float16,fp8,0,0.30549919605255127
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,96,96,128,1,fp8,fp8,0,0.3061023950576782
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,96,1,128,1,float16,float16,0,0.20727200508117677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,96,1,128,1,float16,fp8,0,0.20897600650787354
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,96,2,128,1,float16,float16,0,0.20747039318084717
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,96,1,128,1,fp8,fp8,0,0.2082832098007202
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,96,2,128,1,float16,fp8,0,0.20714240074157714
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,96,2,128,1,fp8,fp8,0,0.20800321102142333
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,96,4,128,1,float16,float16,0,0.20901761054992676
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,96,4,128,1,float16,fp8,0,0.2085632085800171
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,96,4,128,1,fp8,fp8,0,0.20701439380645753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,96,8,128,1,float16,float16,0,0.2149120092391968
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,96,96,128,1,float16,float16,0,0.17343519926071166
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,96,8,128,1,fp8,fp8,0,0.20785119533538818
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,96,96,128,1,float16,fp8,0,0.16211680173873902
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,96,8,128,1,float16,fp8,0,0.2064079999923706
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,96,1,128,1,float16,float16,0,0.11397119760513305
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,96,1,128,1,float16,fp8,0,0.11240960359573364
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,96,96,128,1,fp8,fp8,0,0.16097919940948485
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,96,1,128,1,fp8,fp8,0,0.11121120452880859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,96,2,128,1,float16,float16,0,0.11325759887695312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,96,2,128,1,float16,fp8,0,0.11232160329818726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,96,2,128,1,fp8,fp8,0,0.11142239570617676
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,96,4,128,1,float16,float16,0,0.11397759914398194
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,96,4,128,1,float16,fp8,0,0.11353440284729004
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,96,4,128,1,fp8,fp8,0,0.11157280206680298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,96,8,128,1,float16,float16,0,0.115283203125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,96,8,128,1,float16,fp8,0,0.11149280071258545
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,96,8,128,1,fp8,fp8,0,0.11271840333938599
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,96,96,128,1,float16,float16,0,0.09567520022392273
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,96,96,128,1,float16,fp8,0,0.0881439983844757
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,96,96,128,1,fp8,fp8,0,0.088782399892807
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,96,1,128,1,float16,float16,0,0.062052798271179196
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,96,1,128,1,float16,fp8,0,0.06172320246696472
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,96,1,128,1,fp8,fp8,0,0.061612802743911746
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,96,2,128,1,float16,float16,0,0.06276959776878357
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,96,2,128,1,float16,fp8,0,0.061635202169418334
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,96,2,128,1,fp8,fp8,0,0.06170240044593811
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,96,4,128,1,float16,float16,0,0.06343200206756591
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,96,4,128,1,float16,fp8,0,0.06169760227203369
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,96,4,128,1,fp8,fp8,0,0.06164960265159607
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,96,8,128,1,float16,float16,0,0.06636639833450317
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,96,8,128,1,float16,fp8,0,0.06170719861984253
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,96,8,128,1,fp8,fp8,0,0.061592000722885135
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,96,96,128,1,float16,float16,0,0.055105602741241454
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,96,96,128,1,float16,fp8,0,0.05103359818458557
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,96,96,128,1,fp8,fp8,0,0.04959999918937683
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,96,1,128,1,float16,float16,0,0.03890720009803772
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,96,1,128,1,float16,fp8,0,0.03747040033340454
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,96,1,128,1,fp8,fp8,0,0.0371535986661911
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,96,2,128,1,float16,float16,0,0.03905439972877502
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,96,2,128,1,float16,fp8,0,0.03831999897956848
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,96,2,128,1,fp8,fp8,0,0.03707360029220581
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,96,4,128,1,float16,float16,0,0.03909119963645935
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,96,4,128,1,float16,fp8,0,0.0378607988357544
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,96,4,128,1,fp8,fp8,0,0.0375247985124588
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,96,8,128,1,float16,float16,0,0.039166399836540224
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,96,8,128,1,float16,fp8,0,0.03787199854850769
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,96,8,128,1,fp8,fp8,0,0.03708640038967133
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,96,96,128,1,float16,float16,0,0.03104960024356842
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,96,96,128,1,float16,fp8,0,0.03296000063419342
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,96,96,128,1,fp8,fp8,0,0.032974401116371156
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,96,1,128,1,float16,float16,0,0.026841598749160766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,96,1,128,1,float16,fp8,0,0.026811200380325317
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,96,1,128,1,fp8,fp8,0,0.02683520019054413
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,96,2,128,1,float16,float16,0,0.026924800872802735
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,96,2,128,1,float16,fp8,0,0.026796799898147584
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,96,2,128,1,fp8,fp8,0,0.026820799708366393
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,96,4,128,1,float16,float16,0,0.026840001344680786
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,96,4,128,1,float16,fp8,0,0.026700800657272337
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,96,4,128,1,fp8,fp8,0,0.02677919864654541
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,96,8,128,1,float16,float16,0,0.026795199513435362
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,96,8,128,1,float16,fp8,0,0.026739200949668883
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,96,8,128,1,fp8,fp8,0,0.02677919864654541
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,96,1,128,1,float16,float16,0,1.2323568344116211
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,96,1,128,1,float16,fp8,0,1.2962191581726075
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,96,1,128,1,fp8,fp8,0,1.2945823669433594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,96,2,128,1,float16,float16,0,1.235916805267334
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,96,2,128,1,float16,fp8,0,1.2922656059265136
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,96,2,128,1,fp8,fp8,0,1.2919520378112792
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,96,4,128,1,float16,float16,0,1.2682831764221192
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,96,4,128,1,float16,fp8,0,1.290614414215088
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,96,4,128,1,fp8,fp8,0,1.2902815818786622
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,96,8,128,1,float16,float16,0,1.3072815895080567
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,96,96,128,1,float16,float16,0,1.093496036529541
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,96,8,128,1,float16,fp8,0,1.2898799896240234
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,96,8,128,1,fp8,fp8,0,1.2891568183898925
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,96,1,128,1,float16,float16,0,0.6254144191741944
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,96,1,128,1,float16,fp8,0,0.6530447959899902
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,96,96,128,1,float16,fp8,0,1.0987199783325194
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,96,96,128,1,fp8,fp8,0,1.0364912033081055
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,96,1,128,1,fp8,fp8,0,0.654088020324707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,96,2,128,1,float16,float16,0,0.623908805847168
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,96,2,128,1,float16,fp8,0,0.653656005859375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,96,2,128,1,fp8,fp8,0,0.6498047828674316
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,96,4,128,1,float16,float16,0,0.6337520122528076
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,96,4,128,1,float16,fp8,0,0.6521872043609619
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,96,4,128,1,fp8,fp8,0,0.6529263973236084
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,96,8,128,1,float16,float16,0,0.648526382446289
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,96,8,128,1,float16,fp8,0,0.6518064022064209
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,96,8,128,1,fp8,fp8,0,0.6520512104034424
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,96,96,128,1,float16,float16,0,0.5595839977264404
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,96,96,128,1,float16,fp8,0,0.5270351886749267
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,96,1,128,1,float16,float16,0,0.3202863931655884
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,96,96,128,1,fp8,fp8,0,0.5270944118499756
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,96,1,128,1,float16,fp8,0,0.33491199016571044
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,96,1,128,1,fp8,fp8,0,0.3333359956741333
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,96,2,128,1,float16,float16,0,0.31999199390411376
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,96,2,128,1,float16,fp8,0,0.3341423988342285
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,96,2,128,1,fp8,fp8,0,0.3342511892318726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,96,4,128,1,float16,fp8,0,0.33303840160369874
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,96,4,128,1,float16,float16,0,0.32567360401153567
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,96,4,128,1,fp8,fp8,0,0.33372159004211427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,96,8,128,1,float16,float16,0,0.33482239246368406
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,96,8,128,1,float16,fp8,0,0.33289599418640137
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,96,8,128,1,fp8,fp8,0,0.3330591917037964
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,96,96,128,1,float16,float16,0,0.29009599685668946
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,96,96,128,1,float16,fp8,0,0.2721551895141602
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,96,1,128,1,float16,float16,0,0.17187199592590333
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,96,1,128,1,float16,fp8,0,0.17667839527130128
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,96,96,128,1,fp8,fp8,0,0.27267839908599856
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,96,1,128,1,fp8,fp8,0,0.17535359859466554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,96,2,128,1,float16,float16,0,0.17115360498428345
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,96,2,128,1,float16,fp8,0,0.17643680572509765
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,96,2,128,1,fp8,fp8,0,0.1753983974456787
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,96,4,128,1,float16,float16,0,0.17355040311813355
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,96,4,128,1,float16,fp8,0,0.17517759799957275
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,96,4,128,1,fp8,fp8,0,0.1758512020111084
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,96,8,128,1,float16,float16,0,0.17679200172424317
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,96,8,128,1,float16,fp8,0,0.17501599788665773
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,96,96,128,1,float16,float16,0,0.15575679540634155
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,96,96,128,1,float16,fp8,0,0.1445919990539551
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,96,96,128,1,fp8,fp8,0,0.14540480375289916
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,96,8,128,1,fp8,fp8,0,0.17559200525283813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,96,1,128,1,float16,float16,0,0.0957759976387024
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,96,1,128,1,float16,fp8,0,0.09648479819297791
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,96,1,128,1,fp8,fp8,0,0.09618560075759888
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,96,2,128,1,float16,float16,0,0.09543359875679017
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,96,2,128,1,float16,fp8,0,0.0963424026966095
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,96,2,128,1,fp8,fp8,0,0.09629279971122742
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,96,4,128,1,float16,float16,0,0.09682559967041016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,96,4,128,1,float16,fp8,0,0.09619680047035217
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,96,4,128,1,fp8,fp8,0,0.09676640033721924
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,96,8,128,1,float16,float16,0,0.0991487979888916
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,96,8,128,1,float16,fp8,0,0.09623039960861206
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,96,8,128,1,fp8,fp8,0,0.09728959798812867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,96,96,128,1,float16,float16,0,0.08712319731712341
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,96,96,128,1,float16,fp8,0,0.08081920146942138
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,96,1,128,1,float16,float16,0,0.05390080213546753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,96,1,128,1,float16,fp8,0,0.05512639880180359
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,96,96,128,1,fp8,fp8,0,0.08162559866905213
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,96,1,128,1,fp8,fp8,0,0.05480639934539795
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,96,2,128,1,float16,float16,0,0.05516160130500793
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,96,2,128,1,float16,fp8,0,0.054420799016952515
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,96,2,128,1,fp8,fp8,0,0.054953598976135255
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,96,4,128,1,float16,float16,0,0.055580800771713255
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,96,4,128,1,float16,fp8,0,0.05434719920158386
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,96,4,128,1,fp8,fp8,0,0.054846400022506715
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,96,8,128,1,float16,float16,0,0.05745279788970947
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,96,8,128,1,float16,fp8,0,0.05418400168418884
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,96,8,128,1,fp8,fp8,0,0.055315202474594115
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,96,96,128,1,float16,float16,0,0.0474016010761261
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,96,96,128,1,float16,fp8,0,0.0440416008234024
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,96,96,128,1,fp8,fp8,0,0.04333600103855133
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,96,1,128,1,float16,float16,0,0.03097119927406311
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,96,1,128,1,float16,fp8,0,0.031115201115608216
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,96,1,128,1,fp8,fp8,0,0.030932798981666565
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,96,2,128,1,float16,float16,0,0.03091999888420105
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,96,4,128,1,float16,float16,0,0.031006398797035217
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,96,2,128,1,fp8,fp8,0,0.0324288010597229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,96,2,128,1,float16,fp8,0,0.0313264012336731
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,96,8,128,1,float16,float16,0,0.03280639946460724
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,96,4,128,1,float16,fp8,0,0.031124800443649292
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,96,4,128,1,fp8,fp8,0,0.03261440098285675
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,96,8,128,1,float16,fp8,0,0.03104960024356842
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,96,96,128,1,float16,float16,0,0.028971201181411742
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,96,8,128,1,fp8,fp8,0,0.030943998694419862
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,96,96,128,1,float16,fp8,0,0.028987199068069458
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,96,1,128,1,float16,float16,0,0.022731199860572815
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,96,96,128,1,fp8,fp8,0,0.029504001140594482
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,96,1,128,1,float16,fp8,0,0.022843199968338012
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,96,2,128,1,float16,float16,0,0.022776000201702118
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,96,1,128,1,fp8,fp8,0,0.02272319942712784
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,96,2,128,1,float16,fp8,0,0.02279199957847595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,96,2,128,1,fp8,fp8,0,0.022779199481010436
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,96,4,128,1,float16,float16,0,0.02290080040693283
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,96,4,128,1,float16,fp8,0,0.02289759963750839
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,96,4,128,1,fp8,fp8,0,0.022811199724674224
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,96,8,128,1,float16,float16,0,0.02340639978647232
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,96,8,128,1,float16,fp8,0,0.023068800568580627
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,96,96,128,1,float16,float16,0,0.01865759938955307
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,96,8,128,1,fp8,fp8,0,0.022780799865722658
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,96,96,128,1,float16,fp8,0,0.01931840032339096
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,96,96,128,1,fp8,fp8,0,0.018596799671649934
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,96,1,128,1,float16,float16,0,0.01656000018119812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,96,1,128,1,float16,fp8,0,0.01658719927072525
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,96,1,128,1,fp8,fp8,0,0.01650400012731552
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,96,2,128,1,float16,float16,0,0.016598400473594666
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,96,2,128,1,float16,fp8,0,0.016676799952983858
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,96,2,128,1,fp8,fp8,0,0.016550399363040924
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,96,4,128,1,float16,float16,0,0.01661760061979294
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,96,4,128,1,float16,fp8,0,0.0165120005607605
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,96,4,128,1,fp8,fp8,0,0.016568000614643096
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,96,8,128,1,float16,float16,0,0.016711999475955964
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,96,8,128,1,float16,fp8,0,0.016432000696659087
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,96,8,128,1,fp8,fp8,0,0.016564799845218657
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,96,1,128,1,float16,float16,0,0.7530176162719726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,96,1,128,1,float16,fp8,0,0.7788159847259521
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,96,1,128,1,fp8,fp8,0,0.7828591823577881
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,96,2,128,1,float16,float16,0,0.752726411819458
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,96,2,128,1,float16,fp8,0,0.7785344123840332
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,96,2,128,1,fp8,fp8,0,0.7811952114105225
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,96,4,128,1,float16,float16,0,0.7607967853546143
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,96,4,128,1,float16,fp8,0,0.7763760089874268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,96,4,128,1,fp8,fp8,0,0.7803584098815918
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,96,8,128,1,float16,float16,0,0.7778336048126221
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,96,8,128,1,float16,fp8,0,0.7795567989349366
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,96,96,128,1,float16,float16,0,0.6136911869049072
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,96,8,128,1,fp8,fp8,0,0.7756783962249756
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,96,1,128,1,float16,float16,0,0.3832223892211914
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,96,1,128,1,float16,fp8,0,0.3940160036087036
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,96,96,128,1,float16,fp8,0,0.5872975826263428
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,96,96,128,1,fp8,fp8,0,0.5893184185028076
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,96,1,128,1,fp8,fp8,0,0.39765760898590086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,96,2,128,1,float16,float16,0,0.38155040740966795
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,96,2,128,1,fp8,fp8,0,0.3972640037536621
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,96,2,128,1,float16,fp8,0,0.39614880084991455
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,96,4,128,1,float16,float16,0,0.38782880306243894
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,96,4,128,1,float16,fp8,0,0.39375360012054444
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,96,4,128,1,fp8,fp8,0,0.39643039703369143
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,96,8,128,1,float16,float16,0,0.3960063934326172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,96,8,128,1,float16,fp8,0,0.39629440307617186
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,96,8,128,1,fp8,fp8,0,0.3942015886306763
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,96,96,128,1,float16,float16,0,0.3156336069107056
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,96,96,128,1,float16,fp8,0,0.29954559803009034
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,96,1,128,1,float16,float16,0,0.19891040325164794
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,96,96,128,1,fp8,fp8,0,0.3012336015701294
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,96,1,128,1,float16,fp8,0,0.20365440845489502
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,96,1,128,1,fp8,fp8,0,0.2050463914871216
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,96,2,128,1,float16,float16,0,0.19821759462356567
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,96,2,128,1,float16,fp8,0,0.20331521034240724
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,96,2,128,1,fp8,fp8,0,0.205019211769104
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,96,4,128,1,float16,float16,0,0.19998879432678224
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,96,4,128,1,float16,fp8,0,0.2033344030380249
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,96,4,128,1,fp8,fp8,0,0.20485761165618896
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,96,8,128,1,float16,float16,0,0.20439519882202148
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,96,8,128,1,float16,fp8,0,0.20463840961456298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,96,8,128,1,fp8,fp8,0,0.20528318881988525
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,96,96,128,1,float16,float16,0,0.16468960046768188
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,96,96,128,1,float16,fp8,0,0.15700960159301758
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,96,1,128,1,float16,float16,0,0.10591200590133668
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,96,96,128,1,fp8,fp8,0,0.15813920497894288
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,96,1,128,1,float16,fp8,0,0.10891679525375367
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,96,1,128,1,fp8,fp8,0,0.10868480205535888
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,96,2,128,1,float16,float16,0,0.10723520517349243
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,96,2,128,1,float16,fp8,0,0.10885920524597167
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,96,2,128,1,fp8,fp8,0,0.10888639688491822
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,96,4,128,1,float16,float16,0,0.10886559486389161
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,96,4,128,1,fp8,fp8,0,0.10890719890594483
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,96,4,128,1,float16,fp8,0,0.10890560150146485
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,96,8,128,1,float16,float16,0,0.11122239828109741
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,96,8,128,1,float16,fp8,0,0.10924160480499268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,96,96,128,1,float16,float16,0,0.09010879993438721
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,96,8,128,1,fp8,fp8,0,0.1088752031326294
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,96,96,128,1,float16,fp8,0,0.08624160289764404
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,96,1,128,1,float16,float16,0,0.05967680215835571
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,96,96,128,1,fp8,fp8,0,0.08637440204620361
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,96,1,128,1,float16,fp8,0,0.05985599756240845
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,96,2,128,1,float16,float16,0,0.05981919765472412
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,96,1,128,1,fp8,fp8,0,0.05971199870109558
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,96,2,128,1,float16,fp8,0,0.05964959859848022
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,96,2,128,1,fp8,fp8,0,0.05963199734687805
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,96,4,128,1,float16,float16,0,0.05963360071182251
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,96,4,128,1,float16,fp8,0,0.0596671998500824
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,96,4,128,1,fp8,fp8,0,0.05963199734687805
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,96,8,128,1,float16,float16,0,0.06022560000419617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,96,8,128,1,float16,fp8,0,0.05965759754180908
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,96,8,128,1,fp8,fp8,0,0.05973280072212219
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,96,96,128,1,float16,float16,0,0.05048480033874512
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,96,96,128,1,fp8,fp8,0,0.04747360050678253
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,96,1,128,1,float16,float16,0,0.03508319854736328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,96,96,128,1,float16,fp8,0,0.04762240052223206
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,96,1,128,1,float16,fp8,0,0.0356799989938736
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,96,2,128,1,float16,float16,0,0.035076799988746646
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,96,1,128,1,fp8,fp8,0,0.035124799609184264
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,96,2,128,1,float16,fp8,0,0.036006399989128114
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,96,2,128,1,fp8,fp8,0,0.035102400183677676
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,96,4,128,1,float16,float16,0,0.03549120128154755
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,96,4,128,1,float16,fp8,0,0.03573119938373566
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,96,4,128,1,fp8,fp8,0,0.03513120114803314
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,96,8,128,1,float16,float16,0,0.035128000378608706
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,96,8,128,1,float16,fp8,0,0.0359824001789093
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,96,8,128,1,fp8,fp8,0,0.03506079912185669
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,96,96,128,1,float16,float16,0,0.02686559855937958
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,96,96,128,1,float16,fp8,0,0.028862398862838746
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,96,96,128,1,fp8,fp8,0,0.027316799759864806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,96,1,128,1,float16,float16,0,0.02102400064468384
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,96,1,128,1,float16,fp8,0,0.022519999742507936
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,96,1,128,1,fp8,fp8,0,0.02168319970369339
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,96,2,128,1,float16,float16,0,0.021164800226688384
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,96,2,128,1,float16,fp8,0,0.022575999796390533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,96,2,128,1,fp8,fp8,0,0.022060799598693847
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,96,4,128,1,float16,float16,0,0.022103999555110932
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,96,4,128,1,float16,fp8,0,0.022776000201702118
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,96,4,128,1,fp8,fp8,0,0.02268799990415573
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,96,8,128,1,float16,float16,0,0.02263679951429367
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,96,8,128,1,float16,fp8,0,0.022494399547576906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,96,8,128,1,fp8,fp8,0,0.022668799757957457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,96,96,128,1,float16,fp8,0,0.020623999834060668
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,96,96,128,1,float16,float16,0,0.01868959963321686
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,96,96,128,1,fp8,fp8,0,0.02066880017518997
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,96,1,128,1,float16,float16,0,0.016683200001716615
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,96,1,128,1,float16,fp8,0,0.016553600132465363
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,96,1,128,1,fp8,fp8,0,0.016672000288963318
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,96,2,128,1,float16,float16,0,0.01674399971961975
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,96,2,128,1,float16,fp8,0,0.016568000614643096
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,96,2,128,1,fp8,fp8,0,0.016756799817085267
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,96,4,128,1,float16,float16,0,0.016756799817085267
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,96,4,128,1,float16,fp8,0,0.01658879965543747
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,96,4,128,1,fp8,fp8,0,0.01671999990940094
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,96,8,128,1,float16,float16,0,0.01736319959163666
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,96,8,128,1,float16,fp8,0,0.016612799465656282
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,96,8,128,1,fp8,fp8,0,0.016748799383640288
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,96,96,128,1,float16,float16,0,0.014723199605941772
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,96,96,128,1,float16,fp8,0,0.014529600739479065
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,96,96,128,1,fp8,fp8,0,0.014481599628925323
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,96,1,128,1,float16,float16,0,0.012644800543785095
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,96,1,128,1,float16,fp8,0,0.012464000284671784
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,96,1,128,1,fp8,fp8,0,0.01263200044631958
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,96,2,128,1,float16,float16,0,0.012590399384498597
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,96,2,128,1,float16,fp8,0,0.012436799705028534
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,96,2,128,1,fp8,fp8,0,0.012559999525547028
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,96,4,128,1,float16,float16,0,0.012606400251388549
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,96,4,128,1,float16,fp8,0,0.012432000041007996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,96,4,128,1,fp8,fp8,0,0.012561599910259246
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,96,8,128,1,float16,float16,0,0.012479999661445617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,96,8,128,1,float16,fp8,0,0.012521600723266602
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,96,8,128,1,fp8,fp8,0,0.012468799948692322
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,96,1,128,1,float16,float16,0,0.5796688079833985
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,96,1,128,1,float16,fp8,0,0.593555212020874
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,96,1,128,1,fp8,fp8,0,0.5947216033935547
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,96,2,128,1,float16,float16,0,0.5785823822021484
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,96,2,128,1,float16,fp8,0,0.59269118309021
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,96,2,128,1,fp8,fp8,0,0.5939743995666504
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,96,4,128,1,float16,float16,0,0.5834688186645508
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,96,4,128,1,float16,fp8,0,0.5921999931335449
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,96,8,128,1,float16,float16,0,0.591977596282959
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,96,4,128,1,fp8,fp8,0,0.5935152053833008
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,96,8,128,1,float16,fp8,0,0.5919392108917236
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,96,96,128,1,float16,fp8,0,0.3968224048614502
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,96,96,128,1,float16,float16,0,0.412332820892334
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,96,8,128,1,fp8,fp8,0,0.5920928001403809
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,96,1,128,1,float16,float16,0,0.2971663951873779
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,96,1,128,1,float16,fp8,0,0.3028752088546753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,96,96,128,1,fp8,fp8,0,0.39659039974212645
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,96,1,128,1,fp8,fp8,0,0.3017263889312744
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,96,2,128,1,float16,float16,0,0.2959088087081909
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,96,2,128,1,float16,fp8,0,0.30143520832061765
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,96,2,128,1,fp8,fp8,0,0.3019007921218872
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,96,4,128,1,float16,float16,0,0.29932799339294436
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,96,4,128,1,float16,fp8,0,0.30147199630737304
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,96,4,128,1,fp8,fp8,0,0.3019248008728027
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,96,8,128,1,float16,float16,0,0.30347518920898436
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,96,8,128,1,float16,fp8,0,0.30147199630737304
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,96,96,128,1,float16,float16,0,0.21340160369873046
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,96,8,128,1,fp8,fp8,0,0.3015968084335327
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,96,96,128,1,float16,fp8,0,0.2044912099838257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,96,96,128,1,fp8,fp8,0,0.20511200428009033
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,96,1,128,1,float16,float16,0,0.15601279735565185
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,96,1,128,1,float16,fp8,0,0.15591520071029663
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,96,1,128,1,fp8,fp8,0,0.1560528039932251
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,96,2,128,1,float16,float16,0,0.1550271987915039
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,96,2,128,1,float16,fp8,0,0.15589760541915892
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,96,2,128,1,fp8,fp8,0,0.1559391975402832
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,96,4,128,1,float16,float16,0,0.15636639595031737
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,96,4,128,1,float16,fp8,0,0.1561519980430603
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,96,4,128,1,fp8,fp8,0,0.15605599880218507
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,96,8,128,1,float16,float16,0,0.15862720012664794
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,96,8,128,1,float16,fp8,0,0.15586880445480347
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,96,8,128,1,fp8,fp8,0,0.15593440532684327
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,96,96,128,1,float16,float16,0,0.11289279460906983
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,96,96,128,1,float16,fp8,0,0.10885920524597167
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,96,1,128,1,float16,float16,0,0.08309440016746521
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,96,96,128,1,fp8,fp8,0,0.10876799821853637
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,96,1,128,1,float16,fp8,0,0.08225119709968567
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,96,1,128,1,fp8,fp8,0,0.08219199776649475
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,96,2,128,1,float16,float16,0,0.08317599892616272
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,96,2,128,1,float16,fp8,0,0.0822704017162323
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,96,2,128,1,fp8,fp8,0,0.08244959712028503
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,96,4,128,1,float16,float16,0,0.08432000279426574
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,96,4,128,1,float16,fp8,0,0.08218719959259033
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,96,4,128,1,fp8,fp8,0,0.08232640027999878
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,96,8,128,1,float16,float16,0,0.08568159937858581
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,96,8,128,1,float16,fp8,0,0.0839631974697113
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,96,96,128,1,float16,float16,0,0.0616752028465271
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,96,8,128,1,fp8,fp8,0,0.08309599757194519
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,96,96,128,1,float16,fp8,0,0.05966399908065796
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,96,96,128,1,fp8,fp8,0,0.05965920090675354
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,96,1,128,1,float16,float16,0,0.045630401372909545
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,96,1,128,1,float16,fp8,0,0.046492800116539
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,96,1,128,1,fp8,fp8,0,0.046988800168037415
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,96,2,128,1,float16,float16,0,0.04719679951667786
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,96,2,128,1,float16,fp8,0,0.046696001291275026
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,96,2,128,1,fp8,fp8,0,0.04627839922904968
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,96,4,128,1,float16,float16,0,0.04606559872627258
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,96,4,128,1,float16,fp8,0,0.04729920029640198
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,96,4,128,1,fp8,fp8,0,0.04723199903964996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,96,8,128,1,float16,float16,0,0.04712800085544586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,96,8,128,1,float16,fp8,0,0.046961599588394166
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,96,8,128,1,fp8,fp8,0,0.047249600291252136
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,96,96,128,1,float16,float16,0,0.03399359881877899
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,96,96,128,1,float16,fp8,0,0.03504480123519897
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,96,1,128,1,float16,float16,0,0.028972798585891725
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,96,96,128,1,fp8,fp8,0,0.03491680026054382
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,96,1,128,1,float16,fp8,0,0.02884959876537323
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,96,1,128,1,fp8,fp8,0,0.028835201263427736
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,96,2,128,1,float16,float16,0,0.028857600688934327
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,96,2,128,1,float16,fp8,0,0.028841599822044373
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,96,4,128,1,float16,float16,0,0.028892800211906433
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,96,2,128,1,fp8,fp8,0,0.02885119915008545
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,96,4,128,1,float16,fp8,0,0.02887519896030426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,96,8,128,1,float16,float16,0,0.02893120050430298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,96,8,128,1,float16,fp8,0,0.02886880040168762
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,96,4,128,1,fp8,fp8,0,0.028838399052619933
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,96,8,128,1,fp8,fp8,0,0.02890239953994751
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,96,96,128,1,float16,float16,0,0.020691199600696562
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,96,96,128,1,float16,fp8,0,0.02067520022392273
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,96,96,128,1,fp8,fp8,0,0.020838400721549986
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,96,1,128,1,float16,float16,0,0.018563200533390046
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,96,1,128,1,float16,fp8,0,0.018555200099945067
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,96,2,128,1,float16,float16,0,0.018532800674438476
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,96,1,128,1,fp8,fp8,0,0.018571199476718904
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,96,2,128,1,float16,fp8,0,0.018695999681949616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,96,2,128,1,fp8,fp8,0,0.018568000197410582
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,96,4,128,1,float16,float16,0,0.018811200559139252
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,96,4,128,1,float16,fp8,0,0.018564799427986146
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,96,4,128,1,fp8,fp8,0,0.018580800294876097
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,96,8,128,1,float16,float16,0,0.018648000061511995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,96,8,128,1,float16,fp8,0,0.01868640035390854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,96,8,128,1,fp8,fp8,0,0.018571199476718904
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,96,96,128,1,float16,float16,0,0.01666560024023056
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,96,96,128,1,float16,fp8,0,0.016510400176048278
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,96,96,128,1,fp8,fp8,0,0.016492800414562227
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,96,1,128,1,float16,float16,0,0.014638400077819825
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,96,1,128,1,float16,fp8,0,0.014595200121402741
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,96,1,128,1,fp8,fp8,0,0.014508800208568573
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,96,2,128,1,float16,float16,0,0.014655999839305878
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,96,2,128,1,float16,fp8,0,0.014683200418949128
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,96,2,128,1,fp8,fp8,0,0.014534400403499603
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,96,4,128,1,float16,float16,0,0.014628799259662628
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,96,4,128,1,float16,fp8,0,0.0147024005651474
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,96,4,128,1,fp8,fp8,0,0.014526399970054626
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,96,8,128,1,float16,fp8,0,0.014640000462532044
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,96,8,128,1,float16,float16,0,0.014627200365066529
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,96,8,128,1,fp8,fp8,0,0.01449120044708252
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,96,96,128,1,float16,float16,0,0.014452800154685974
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,96,96,128,1,float16,fp8,0,0.012583999335765839
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,96,96,128,1,fp8,fp8,0,0.012483199685811996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,96,1,128,1,float16,float16,0,0.01239359974861145
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,96,1,128,1,float16,fp8,0,0.012404800206422806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,96,1,128,1,fp8,fp8,0,0.011048000305891037
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,96,2,128,1,float16,fp8,0,0.012455999851226807
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,96,2,128,1,float16,float16,0,0.012465599924325943
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,96,2,128,1,fp8,fp8,0,0.011363200098276138
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,96,4,128,1,float16,float16,0,0.011033599823713302
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,96,4,128,1,float16,fp8,0,0.012411200255155564
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,96,4,128,1,fp8,fp8,0,0.011771199852228164
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,96,8,128,1,float16,float16,0,0.011646399646997452
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,96,8,128,1,float16,fp8,0,0.012462399899959564
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,96,8,128,1,fp8,fp8,0,0.012188799679279327
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,96,1,128,1,fp8,fp8,0,0.5071407794952393
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,96,1,128,1,float16,fp8,0,0.507812786102295
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,96,1,128,1,float16,float16,0,0.49361438751220704
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,96,2,128,1,float16,float16,0,0.49436159133911134
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,96,2,128,1,float16,fp8,0,0.5077631950378418
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,96,2,128,1,fp8,fp8,0,0.5043536186218261
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,96,4,128,1,float16,float16,0,0.4955728054046631
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,96,4,128,1,float16,fp8,0,0.5073472023010254
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,96,4,128,1,fp8,fp8,0,0.5042575836181641
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,96,8,128,1,float16,float16,0,0.49939842224121095
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,96,8,128,1,float16,fp8,0,0.5074272155761719
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,96,8,128,1,fp8,fp8,0,0.5041071891784668
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,96,96,128,1,float16,fp8,0,0.30761120319366453
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,96,96,128,1,float16,float16,0,0.3102463960647583
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,96,96,128,1,fp8,fp8,0,0.30667040348052976
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,96,1,128,1,float16,float16,0,0.2568624019622803
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,96,1,128,1,float16,fp8,0,0.258622407913208
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,96,1,128,1,fp8,fp8,0,0.25838398933410645
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,96,2,128,1,float16,float16,0,0.25597438812255857
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,96,2,128,1,float16,fp8,0,0.25859360694885253
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,96,2,128,1,fp8,fp8,0,0.25839040279388426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,96,4,128,1,float16,float16,0,0.2576080083847046
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,96,4,128,1,float16,fp8,0,0.2582751989364624
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,96,4,128,1,fp8,fp8,0,0.2584415912628174
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,96,8,128,1,float16,float16,0,0.2591264009475708
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,96,8,128,1,float16,fp8,0,0.2587824106216431
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,96,8,128,1,fp8,fp8,0,0.25760478973388673
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,96,96,128,1,float16,float16,0,0.16436320543289185
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,96,96,128,1,float16,fp8,0,0.158788800239563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,96,96,128,1,fp8,fp8,0,0.1583888053894043
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,96,1,128,1,float16,float16,0,0.13382719755172728
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,96,1,128,1,float16,fp8,0,0.13349599838256837
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,96,1,128,1,fp8,fp8,0,0.13343039751052857
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,96,2,128,1,float16,fp8,0,0.1336943984031677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,96,2,128,1,float16,float16,0,0.13447519540786743
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,96,2,128,1,fp8,fp8,0,0.13337119817733764
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,96,4,128,1,float16,float16,0,0.13416160345077516
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,96,4,128,1,float16,fp8,0,0.1336527943611145
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,96,4,128,1,fp8,fp8,0,0.13334399461746216
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,96,8,128,1,float16,float16,0,0.1349984049797058
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,96,8,128,1,float16,fp8,0,0.13334720134735106
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,96,8,128,1,fp8,fp8,0,0.13366559743881226
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,96,96,128,1,float16,float16,0,0.08635839819908142
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,96,96,128,1,float16,fp8,0,0.08419679999351501
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,96,1,128,1,float16,float16,0,0.07060160040855408
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,96,1,128,1,float16,fp8,0,0.07186239957809448
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,96,1,128,1,fp8,fp8,0,0.07191680073738098
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,96,96,128,1,fp8,fp8,0,0.08433759808540345
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,96,2,128,1,float16,float16,0,0.07100800275802613
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,96,2,128,1,fp8,fp8,0,0.07183200120925903
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,96,2,128,1,float16,fp8,0,0.07197759747505188
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,96,4,128,1,float16,float16,0,0.07068960070610046
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,96,4,128,1,float16,fp8,0,0.07200800180435181
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,96,4,128,1,fp8,fp8,0,0.07193599939346314
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,96,8,128,1,float16,float16,0,0.07196959853172302
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,96,8,128,1,float16,fp8,0,0.07193120121955872
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,96,8,128,1,fp8,fp8,0,0.07191839814186096
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,96,96,128,1,float16,float16,0,0.04727199971675873
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,96,96,128,1,float16,fp8,0,0.04726560115814209
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,96,1,128,1,float16,float16,0,0.04129599928855896
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,96,96,128,1,fp8,fp8,0,0.04737600088119507
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,96,1,128,1,float16,fp8,0,0.04117920100688934
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,96,1,128,1,fp8,fp8,0,0.04115839898586273
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,96,2,128,1,float16,float16,0,0.04126079976558685
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,96,2,128,1,float16,fp8,0,0.041198399662971494
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,96,2,128,1,fp8,fp8,0,0.04116480052471161
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,96,4,128,1,float16,float16,0,0.04118559956550598
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,96,4,128,1,float16,fp8,0,0.04108160138130188
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,96,4,128,1,fp8,fp8,0,0.04116320013999939
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,96,8,128,1,float16,float16,0,0.04127840101718903
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,96,8,128,1,float16,fp8,0,0.04126879870891571
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,96,96,128,1,float16,float16,0,0.028785601258277893
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,96,8,128,1,fp8,fp8,0,0.04110080003738403
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,96,96,128,1,float16,fp8,0,0.02884800136089325
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,96,96,128,1,fp8,fp8,0,0.02879360020160675
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,96,1,128,1,float16,float16,0,0.02475679963827133
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,96,1,128,1,float16,fp8,0,0.024846400320529937
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,96,1,128,1,fp8,fp8,0,0.026440000534057616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,96,2,128,1,float16,float16,0,0.0247871994972229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,96,2,128,1,fp8,fp8,0,0.02671839892864227
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,96,2,128,1,float16,fp8,0,0.0247871994972229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,96,4,128,1,float16,float16,0,0.024825599789619446
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,96,4,128,1,float16,fp8,0,0.026734399795532226
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,96,4,128,1,fp8,fp8,0,0.024940800666809083
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,96,8,128,1,float16,float16,0,0.02478879988193512
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,96,8,128,1,float16,fp8,0,0.02675839960575104
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,96,8,128,1,fp8,fp8,0,0.024903999269008638
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,96,96,128,1,float16,float16,0,0.018636800348758698
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,96,96,128,1,float16,fp8,0,0.018559999763965607
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,96,96,128,1,fp8,fp8,0,0.018688000738620758
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,96,1,128,1,float16,float16,0,0.016574400663375854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,96,1,128,1,fp8,fp8,0,0.016689600050449373
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,96,2,128,1,float16,float16,0,0.016603200137615202
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,96,1,128,1,float16,fp8,0,0.016686399281024934
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,96,2,128,1,float16,fp8,0,0.016777600347995757
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,96,2,128,1,fp8,fp8,0,0.01658719927072525
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,96,4,128,1,float16,float16,0,0.016832000017166136
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,96,4,128,1,float16,fp8,0,0.016683200001716615
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,96,4,128,1,fp8,fp8,0,0.01656319946050644
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,96,8,128,1,float16,float16,0,0.016790400445461272
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,96,8,128,1,float16,fp8,0,0.01679680049419403
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,96,96,128,1,float16,float16,0,0.016499200463294984
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,96,8,128,1,fp8,fp8,0,0.016659200191497803
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,96,96,128,1,fp8,fp8,0,0.014689600467681885
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,96,1,128,1,float16,float16,0,0.013860799372196198
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,96,96,128,1,float16,fp8,0,0.014670400321483612
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,96,1,128,1,float16,fp8,0,0.014500799775123595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,96,2,128,1,float16,float16,0,0.01443839967250824
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,96,1,128,1,fp8,fp8,0,0.01454080045223236
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,96,2,128,1,float16,fp8,0,0.014417600631713868
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,96,4,128,1,float16,float16,0,0.013991999626159667
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,96,2,128,1,fp8,fp8,0,0.014451199769973755
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,96,4,128,1,float16,fp8,0,0.014496000111103058
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,96,4,128,1,fp8,fp8,0,0.014524799585342408
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,96,8,128,1,float16,fp8,0,0.014452800154685974
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,96,8,128,1,float16,float16,0,0.01449279934167862
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,96,8,128,1,fp8,fp8,0,0.013321599364280701
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,96,96,128,1,float16,float16,0,0.01327199935913086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,96,96,128,1,float16,fp8,0,0.012467200309038163
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,96,96,128,1,fp8,fp8,0,0.010441599786281586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,96,1,128,1,float16,float16,0,0.010524799674749374
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,96,1,128,1,float16,fp8,0,0.010548800230026245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,96,2,128,1,float16,float16,0,0.010552000254392624
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,96,2,128,1,float16,fp8,0,0.010595200210809707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,96,1,128,1,fp8,fp8,0,0.010452800244092942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,96,2,128,1,fp8,fp8,0,0.010496000200510025
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,96,4,128,1,float16,float16,0,0.010492800176143647
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,96,4,128,1,float16,fp8,0,0.010438399761915207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,96,4,128,1,fp8,fp8,0,0.010523200035095215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,96,8,128,1,float16,float16,0,0.010505600273609162
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,96,8,128,1,float16,fp8,0,0.010428799688816071
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,96,8,128,1,fp8,fp8,0,0.010489600151777268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,96,1,128,1,float16,float16,0,0.47426400184631345
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,96,1,128,1,float16,fp8,0,0.45860800743103025
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,96,1,128,1,fp8,fp8,0,0.4589407920837402
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,96,2,128,1,float16,fp8,0,0.45774240493774415
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,96,2,128,1,fp8,fp8,0,0.459116792678833
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,96,2,128,1,float16,float16,0,0.4747200012207031
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,96,4,128,1,float16,fp8,0,0.4585775852203369
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,96,4,128,1,fp8,fp8,0,0.45605921745300293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,96,4,128,1,float16,float16,0,0.4770768165588379
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,96,8,128,1,float16,float16,0,0.48049440383911135
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,96,8,128,1,float16,fp8,0,0.455734395980835
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,96,8,128,1,fp8,fp8,0,0.45734238624572754
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,96,96,128,1,float16,float16,0,0.2766736030578613
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,96,96,128,1,fp8,fp8,0,0.2583440065383911
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,96,96,128,1,float16,fp8,0,0.25766398906707766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,96,1,128,1,float16,float16,0,0.2476047992706299
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,96,1,128,1,float16,fp8,0,0.23368160724639891
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,96,1,128,1,fp8,fp8,0,0.23176639080047606
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,96,2,128,1,float16,float16,0,0.24732160568237305
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,96,2,128,1,float16,fp8,0,0.2337023973464966
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,96,2,128,1,fp8,fp8,0,0.2319103956222534
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,96,4,128,1,float16,float16,0,0.24791998863220216
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,96,4,128,1,fp8,fp8,0,0.23350880146026612
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,96,8,128,1,float16,float16,0,0.24653759002685546
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,96,4,128,1,float16,fp8,0,0.23378078937530516
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,96,8,128,1,float16,fp8,0,0.23387839794158935
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,96,96,128,1,float16,float16,0,0.14160000085830687
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,96,96,128,1,float16,fp8,0,0.13351520299911498
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,96,8,128,1,fp8,fp8,0,0.23377599716186523
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,96,96,128,1,fp8,fp8,0,0.13356640338897705
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,96,1,128,1,float16,fp8,0,0.12109440565109253
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,96,1,128,1,fp8,fp8,0,0.12099039554595947
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,96,1,128,1,float16,float16,0,0.12681599855422973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,96,2,128,1,float16,float16,0,0.12763359546661376
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,96,2,128,1,float16,fp8,0,0.1211087942123413
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,96,2,128,1,fp8,fp8,0,0.1192736029624939
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,96,4,128,1,float16,float16,0,0.12731200456619263
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,96,4,128,1,float16,fp8,0,0.1209872007369995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,96,4,128,1,fp8,fp8,0,0.11942720413208008
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,96,8,128,1,float16,float16,0,0.12833600044250487
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,96,8,128,1,float16,fp8,0,0.12105599641799927
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,96,8,128,1,fp8,fp8,0,0.11925599575042725
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,96,96,128,1,float16,float16,0,0.07471519708633423
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,96,96,128,1,float16,fp8,0,0.07024800181388854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,96,96,128,1,fp8,fp8,0,0.07066879868507385
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,96,1,128,1,float16,float16,0,0.06805279850959778
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,96,1,128,1,float16,fp8,0,0.06382399797439575
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,96,1,128,1,fp8,fp8,0,0.06442400217056274
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,96,2,128,1,float16,float16,0,0.0685696005821228
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,96,2,128,1,float16,fp8,0,0.06563839912414551
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,96,2,128,1,fp8,fp8,0,0.06492480039596557
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,96,4,128,1,float16,float16,0,0.06819040179252625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,96,4,128,1,float16,fp8,0,0.06569920182228088
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,96,4,128,1,fp8,fp8,0,0.0653007984161377
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,96,8,128,1,float16,float16,0,0.069760000705719
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,96,8,128,1,float16,fp8,0,0.06571360230445862
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,96,8,128,1,fp8,fp8,0,0.06377279758453369
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,96,96,128,1,float16,float16,0,0.041168001294136045
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,96,96,128,1,float16,fp8,0,0.04115679860115051
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,96,96,128,1,fp8,fp8,0,0.041115200519561766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,96,1,128,1,float16,float16,0,0.039150398969650266
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,96,1,128,1,float16,fp8,0,0.03705439865589142
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,96,1,128,1,fp8,fp8,0,0.037134400010108946
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,96,2,128,1,float16,float16,0,0.039175999164581296
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,96,2,128,1,float16,fp8,0,0.03705280125141144
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,96,2,128,1,fp8,fp8,0,0.03721120059490204
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,96,4,128,1,float16,float16,0,0.03912799954414368
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,96,4,128,1,float16,fp8,0,0.03708640038967133
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,96,4,128,1,fp8,fp8,0,0.037231999635696414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,96,8,128,1,float16,float16,0,0.03931359946727753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,96,8,128,1,float16,fp8,0,0.03703359961509704
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,96,8,128,1,fp8,fp8,0,0.037191998958587644
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,96,96,128,1,float16,float16,0,0.026859200000762938
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,96,96,128,1,float16,fp8,0,0.02479040026664734
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,96,1,128,1,float16,float16,0,0.024771200120449068
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,96,96,128,1,fp8,fp8,0,0.02493920028209686
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,96,1,128,1,float16,fp8,0,0.022812800109386445
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,96,1,128,1,fp8,fp8,0,0.024743999540805816
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,96,2,128,1,float16,float16,0,0.024889600276947022
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,96,2,128,1,float16,fp8,0,0.02282879948616028
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,96,2,128,1,fp8,fp8,0,0.02465119957923889
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,96,4,128,1,float16,float16,0,0.024822400510311128
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,96,4,128,1,float16,fp8,0,0.022833600640296936
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,96,4,128,1,fp8,fp8,0,0.024743999540805816
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,96,8,128,1,float16,fp8,0,0.024711999297142028
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,96,8,128,1,float16,float16,0,0.024766400456428528
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,96,8,128,1,fp8,fp8,0,0.024694399535655977
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,96,96,128,1,float16,float16,0,0.018606400489807128
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,96,96,128,1,float16,fp8,0,0.016697600483894348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,96,1,128,1,float16,float16,0,0.016564799845218657
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,96,96,128,1,fp8,fp8,0,0.01653600037097931
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,96,1,128,1,float16,fp8,0,0.016539199650287627
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,96,2,128,1,float16,float16,0,0.01655679941177368
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,96,1,128,1,fp8,fp8,0,0.016579200327396394
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,96,2,128,1,float16,fp8,0,0.016519999504089354
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,96,2,128,1,fp8,fp8,0,0.016487999260425566
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,96,4,128,1,float16,float16,0,0.016601599752902985
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,96,4,128,1,float16,fp8,0,0.0165583997964859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,96,4,128,1,fp8,fp8,0,0.016598400473594666
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,96,8,128,1,float16,float16,0,0.01650879979133606
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,96,8,128,1,float16,fp8,0,0.016524800658226015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,96,8,128,1,fp8,fp8,0,0.01659200042486191
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,96,96,128,1,float16,float16,0,0.014745600521564484
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,96,96,128,1,float16,fp8,0,0.013424000144004822
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,96,1,128,1,float16,float16,0,0.012539200484752655
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,96,96,128,1,fp8,fp8,0,0.014239999651908874
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,96,1,128,1,float16,fp8,0,0.013617600500583648
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,96,1,128,1,fp8,fp8,0,0.01252799928188324
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,96,2,128,1,float16,float16,0,0.012470400333404541
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,96,2,128,1,float16,fp8,0,0.013027200102806091
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,96,2,128,1,fp8,fp8,0,0.01247360035777092
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,96,4,128,1,float16,float16,0,0.012579199671745301
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,96,4,128,1,float16,fp8,0,0.013097600638866424
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,96,8,128,1,float16,float16,0,0.012479999661445617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,96,4,128,1,fp8,fp8,0,0.012569600343704223
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,96,8,128,1,float16,fp8,0,0.013396799564361572
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,96,8,128,1,fp8,fp8,0,0.012451200187206269
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,96,96,128,1,float16,float16,0,0.01252640038728714
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,96,96,128,1,float16,fp8,0,0.010582400113344192
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,96,96,128,1,fp8,fp8,0,0.01045600026845932
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,96,1,128,1,float16,float16,0,0.010422399640083313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,96,1,128,1,float16,fp8,0,0.010422399640083313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,96,2,128,1,float16,float16,0,0.010476800054311753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,96,1,128,1,fp8,fp8,0,0.010401599854230881
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,96,2,128,1,float16,fp8,0,0.01053439974784851
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,96,4,128,1,float16,float16,0,0.010407999902963639
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,96,2,128,1,fp8,fp8,0,0.010409600287675857
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,96,4,128,1,float16,fp8,0,0.01048479974269867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,96,4,128,1,fp8,fp8,0,0.01034879982471466
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,96,8,128,1,float16,float16,0,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,96,8,128,1,float16,fp8,0,0.01051200032234192
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,96,8,128,1,fp8,fp8,0,0.010406400263309478
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,96,1,128,1,float16,fp8,0,0.44064960479736326
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,96,1,128,1,float16,float16,0,0.47023677825927734
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,96,1,128,1,fp8,fp8,0,0.4408847808837891
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,96,2,128,1,float16,float16,0,0.47080159187316895
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,96,2,128,1,fp8,fp8,0,0.4384416103363037
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,96,2,128,1,float16,fp8,0,0.4408415794372559
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,96,4,128,1,float16,fp8,0,0.4409503936767578
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,96,4,128,1,float16,float16,0,0.470798397064209
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,96,4,128,1,fp8,fp8,0,0.44162559509277344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,96,8,128,1,float16,fp8,0,0.43888001441955565
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,96,8,128,1,float16,float16,0,0.4736480236053467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,96,96,128,1,float16,float16,0,0.24318881034851075
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,96,96,128,1,float16,fp8,0,0.2273439884185791
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,96,8,128,1,fp8,fp8,0,0.4420127868652344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,96,96,128,1,fp8,fp8,0,0.22587358951568604
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,96,1,128,1,float16,float16,0,0.24208319187164307
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,96,1,128,1,float16,fp8,0,0.22594881057739258
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,96,1,128,1,fp8,fp8,0,0.2243119955062866
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,96,2,128,1,float16,float16,0,0.24200000762939453
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,96,2,128,1,float16,fp8,0,0.2259040117263794
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,96,2,128,1,fp8,fp8,0,0.22471039295196532
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,96,4,128,1,float16,float16,0,0.24178879261016845
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,96,4,128,1,float16,fp8,0,0.2260159969329834
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,96,4,128,1,fp8,fp8,0,0.22376320362091065
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,96,8,128,1,float16,float16,0,0.2419680118560791
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,96,8,128,1,float16,fp8,0,0.22587039470672607
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,96,96,128,1,float16,float16,0,0.12923040390014648
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,96,8,128,1,fp8,fp8,0,0.2241312026977539
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,96,96,128,1,float16,fp8,0,0.1190559983253479
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,96,96,128,1,fp8,fp8,0,0.11907360553741456
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,96,1,128,1,float16,float16,0,0.1254464030265808
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,96,1,128,1,float16,fp8,0,0.11713600158691406
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,96,1,128,1,fp8,fp8,0,0.11696480512619019
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,96,2,128,1,float16,float16,0,0.12547999620437622
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,96,2,128,1,fp8,fp8,0,0.11695200204849243
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,96,2,128,1,float16,fp8,0,0.11735359430313111
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,96,4,128,1,float16,float16,0,0.12540639638900758
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,96,4,128,1,float16,fp8,0,0.11693919897079467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,96,4,128,1,fp8,fp8,0,0.11740959882736206
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,96,8,128,1,float16,float16,0,0.12531839609146117
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,96,8,128,1,float16,fp8,0,0.11698399782180786
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,96,96,128,1,float16,float16,0,0.06999199986457824
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,96,8,128,1,fp8,fp8,0,0.11827199459075928
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,96,96,128,1,float16,fp8,0,0.06374559998512268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,96,96,128,1,fp8,fp8,0,0.06367679834365844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,96,1,128,1,float16,fp8,0,0.06368319988250733
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,96,1,128,1,float16,float16,0,0.06836479902267456
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,96,2,128,1,float16,float16,0,0.06785439848899841
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,96,2,128,1,float16,fp8,0,0.06379680037498474
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,96,1,128,1,fp8,fp8,0,0.06366400122642517
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,96,4,128,1,float16,float16,0,0.06854079961776734
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,96,4,128,1,float16,fp8,0,0.06369600296020508
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,96,4,128,1,fp8,fp8,0,0.0636672019958496
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,96,2,128,1,fp8,fp8,0,0.06366080045700073
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,96,8,128,1,float16,float16,0,0.06790080070495605
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,96,8,128,1,float16,fp8,0,0.06366879940032959
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,96,96,128,1,float16,float16,0,0.04118559956550598
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,96,8,128,1,fp8,fp8,0,0.06370880007743836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,96,96,128,1,float16,fp8,0,0.03713279962539673
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,96,96,128,1,fp8,fp8,0,0.03713119924068451
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,96,1,128,1,float16,float16,0,0.039211198687553406
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,96,1,128,1,float16,fp8,0,0.03696959912776947
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,96,1,128,1,fp8,fp8,0,0.03710399866104126
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,96,2,128,1,float16,float16,0,0.039083200693130496
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,96,2,128,1,float16,fp8,0,0.03707360029220581
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,96,2,128,1,fp8,fp8,0,0.03703359961509704
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,96,4,128,1,float16,float16,0,0.039078399538993835
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,96,4,128,1,float16,fp8,0,0.03704800009727478
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,96,4,128,1,fp8,fp8,0,0.0370608001947403
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,96,8,128,1,float16,float16,0,0.0391072005033493
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,96,8,128,1,float16,fp8,0,0.03719359934329987
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,96,8,128,1,fp8,fp8,0,0.03704639971256256
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,96,96,128,1,float16,float16,0,0.026796799898147584
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,96,96,128,1,float16,fp8,0,0.022785599529743194
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,96,96,128,1,fp8,fp8,0,0.024723200500011443
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,96,1,128,1,float16,float16,0,0.02475520074367523
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,96,1,128,1,float16,fp8,0,0.02274879962205887
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,96,1,128,1,fp8,fp8,0,0.022864000499248506
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,96,2,128,1,float16,float16,0,0.024750399589538574
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,96,2,128,1,float16,fp8,0,0.022734400629997254
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,96,2,128,1,fp8,fp8,0,0.022788800299167633
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,96,4,128,1,float16,float16,0,0.024796800315380098
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,96,4,128,1,float16,fp8,0,0.024217599630355836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,96,4,128,1,fp8,fp8,0,0.02271360009908676
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,96,8,128,1,float16,float16,0,0.024743999540805816
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,96,8,128,1,float16,fp8,0,0.022761599719524385
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,96,8,128,1,fp8,fp8,0,0.023817600309848787
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,96,96,128,1,float16,float16,0,0.018643200397491455
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,96,96,128,1,float16,fp8,0,0.01655520051717758
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,96,96,128,1,fp8,fp8,0,0.016574400663375854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,96,1,128,1,float16,float16,0,0.016646400094032288
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,96,1,128,1,float16,fp8,0,0.014752000570297241
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,96,1,128,1,fp8,fp8,0,0.014724799990653991
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,96,2,128,1,float16,float16,0,0.01666080057621002
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,96,2,128,1,float16,fp8,0,0.016438399255275727
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,96,4,128,1,float16,float16,0,0.016582399606704712
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,96,4,128,1,float16,fp8,0,0.016547200083732606
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,96,2,128,1,fp8,fp8,0,0.014732800424098969
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,96,4,128,1,fp8,fp8,0,0.01648160070180893
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,96,8,128,1,float16,float16,0,0.0165583997964859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,96,8,128,1,float16,fp8,0,0.01644960045814514
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,96,96,128,1,float16,float16,0,0.014548799395561219
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,96,8,128,1,fp8,fp8,0,0.016502399742603303
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,96,96,128,1,float16,fp8,0,0.01260959953069687
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,96,96,128,1,fp8,fp8,0,0.012515200674533844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,96,1,128,1,float16,float16,0,0.012726399302482604
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,96,1,128,1,float16,fp8,0,0.012608000636100769
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,96,1,128,1,fp8,fp8,0,0.01255040019750595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,96,2,128,1,float16,float16,0,0.012612800300121307
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,96,2,128,1,fp8,fp8,0,0.012561599910259246
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,96,2,128,1,float16,fp8,0,0.012547199428081513
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,96,4,128,1,float16,float16,0,0.012596799433231354
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,96,4,128,1,fp8,fp8,0,0.0126351997256279
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,96,4,128,1,float16,fp8,0,0.012492799758911132
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,96,8,128,1,float16,float16,0,0.012812800705432892
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,96,8,128,1,float16,fp8,0,0.01247519999742508
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,96,8,128,1,fp8,fp8,0,0.012511999905109405
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,96,96,128,1,float16,float16,0,0.012636800110340119
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,96,96,128,1,float16,fp8,0,0.010596799850463866
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,96,96,128,1,fp8,fp8,0,0.010467199981212616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,96,1,128,1,float16,float16,0,0.010494399815797806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,96,1,128,1,float16,fp8,0,0.010529600083827972
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,96,1,128,1,fp8,fp8,0,0.010440000146627427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,96,2,128,1,float16,float16,0,0.010595200210809707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,96,2,128,1,float16,fp8,0,0.010476800054311753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,96,2,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,96,4,128,1,float16,float16,0,0.010608000308275222
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,96,4,128,1,float16,fp8,0,0.010486400127410889
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,96,4,128,1,fp8,fp8,0,0.010411199927330018
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,96,8,128,1,float16,float16,0,0.01067039966583252
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,96,8,128,1,float16,fp8,0,0.010505600273609162
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,96,8,128,1,fp8,fp8,0,0.010366400331258773
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,64,1,128,1,fp8,fp8,0,21.80906524658203
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,64,1,128,1,float16,fp8,0,23.51988830566406
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,64,1,128,1,float16,float16,0,37.691497802734375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,64,2,128,1,float16,float16,0,37.105752563476564
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,64,2,128,1,float16,fp8,0,23.621383666992188
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,64,2,128,1,fp8,fp8,0,25.273744201660158
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,64,4,128,1,float16,fp8,0,23.352210998535156
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,64,4,128,1,fp8,fp8,0,23.418293762207032
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,64,4,128,1,float16,float16,0,38.765576171875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,64,8,128,1,float16,fp8,0,23.806460571289062
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,64,8,128,1,float16,float16,0,40.34239807128906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,64,8,128,1,fp8,fp8,0,23.363011169433594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,64,64,128,1,float16,fp8,0,11.609140777587891
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,64,64,128,1,fp8,fp8,0,12.031881713867188
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,64,1,128,1,float16,float16,0,19.18805847167969
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,64,1,128,1,float16,fp8,0,10.96451187133789
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,64,1,128,1,fp8,fp8,0,11.728765106201172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,64,2,128,1,float16,fp8,0,12.098558044433593
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,64,2,128,1,fp8,fp8,0,12.561398315429688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,64,2,128,1,float16,float16,0,18.431593322753905
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,64,4,128,1,float16,fp8,0,12.111399841308593
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,64,4,128,1,fp8,fp8,0,11.623355102539062
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,64,4,128,1,float16,float16,0,19.2462890625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,64,8,128,1,float16,fp8,0,11.58914566040039
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,64,8,128,1,fp8,fp8,0,11.504411315917968
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,64,8,128,1,float16,float16,0,20.386982727050782
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,64,64,128,1,float16,fp8,0,5.868175888061524
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,64,64,128,1,fp8,fp8,0,6.062491226196289
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,64,1,128,1,float16,float16,0,9.82919692993164
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,64,1,128,1,float16,fp8,0,5.650467300415039
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,64,1,128,1,fp8,fp8,0,5.750742340087891
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,64,2,128,1,float16,float16,0,9.576185607910157
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,64,2,128,1,float16,fp8,0,5.7971759796142575
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,64,2,128,1,fp8,fp8,0,6.060228729248047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,64,4,128,1,float16,float16,0,9.444595336914062
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,64,4,128,1,float16,fp8,0,6.056273651123047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,64,4,128,1,fp8,fp8,0,5.934862518310547
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,64,8,128,1,float16,float16,0,9.867359924316407
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,64,8,128,1,float16,fp8,0,5.984455871582031
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,64,8,128,1,fp8,fp8,0,5.922235107421875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,64,64,128,1,float16,fp8,0,2.911396789550781
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,64,64,128,1,fp8,fp8,0,3.1157360076904297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,64,1,128,1,float16,float16,0,4.611633682250977
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,64,1,128,1,float16,fp8,0,2.9733232498168944
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,64,1,128,1,fp8,fp8,0,2.731270408630371
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,64,2,128,1,float16,float16,0,4.414020919799805
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,64,2,128,1,float16,fp8,0,3.094063949584961
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,64,2,128,1,fp8,fp8,0,2.871219253540039
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,64,4,128,1,float16,float16,0,4.834921646118164
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,64,4,128,1,float16,fp8,0,3.1407840728759764
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,64,4,128,1,fp8,fp8,0,2.951411247253418
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,64,8,128,1,float16,float16,0,4.604281616210938
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,64,8,128,1,float16,fp8,0,2.8558927536010743
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,64,8,128,1,fp8,fp8,0,2.9789600372314453
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,64,64,128,1,float16,float16,0,3.5503406524658203
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,64,64,128,1,float16,float16,0,8.792095947265626
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,64,1,128,1,float16,float16,0,23.964324951171875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,64,64,128,1,float16,float16,0,18.788482666015625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,64,1,128,1,float16,fp8,0,12.382768249511718
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,64,1,128,1,fp8,fp8,0,12.221640014648438
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,64,2,128,1,float16,fp8,0,13.823829650878906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,64,2,128,1,fp8,fp8,0,13.581907653808594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,64,2,128,1,float16,float16,0,23.192214965820312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,64,4,128,1,float16,fp8,0,13.24329071044922
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,64,4,128,1,float16,float16,0,21.75133056640625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,64,4,128,1,fp8,fp8,0,13.470625305175782
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,64,8,128,1,float16,fp8,0,13.844436645507812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,64,8,128,1,fp8,fp8,0,12.887860107421876
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,64,8,128,1,float16,float16,0,23.63250274658203
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,64,64,128,1,float16,float16,0,10.432701110839844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,64,64,128,1,fp8,fp8,0,6.832126617431641
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,64,64,128,1,float16,fp8,0,6.988771057128906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,64,1,128,1,float16,fp8,0,6.10003662109375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,64,1,128,1,fp8,fp8,0,6.560633850097656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,64,1,128,1,float16,float16,0,11.313623809814453
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,64,2,128,1,float16,float16,0,11.234528350830079
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,64,2,128,1,float16,fp8,0,6.352131271362305
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,64,2,128,1,fp8,fp8,0,6.527291107177734
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,64,4,128,1,float16,fp8,0,6.732266998291015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,64,4,128,1,fp8,fp8,0,6.90771713256836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,64,4,128,1,float16,float16,0,11.663744354248047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,64,8,128,1,float16,float16,0,11.069324493408203
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,64,8,128,1,float16,fp8,0,6.755287933349609
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,64,8,128,1,fp8,fp8,0,6.8180908203125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,64,64,128,1,float16,float16,0,4.85987663269043
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,64,64,128,1,float16,fp8,0,3.5033294677734377
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,64,64,128,1,fp8,fp8,0,3.4016159057617186
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,64,1,128,1,float16,float16,0,4.508555221557617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,64,1,128,1,float16,fp8,0,3.4100318908691407
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,64,1,128,1,fp8,fp8,0,3.2342689514160154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,64,2,128,1,float16,float16,0,5.247444915771484
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,64,2,128,1,float16,fp8,0,3.177195167541504
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,64,2,128,1,fp8,fp8,0,3.324350357055664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,64,4,128,1,float16,float16,0,4.644623947143555
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,64,4,128,1,float16,fp8,0,3.4177295684814455
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,64,4,128,1,fp8,fp8,0,3.2599262237548827
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,64,8,128,1,float16,float16,0,4.502758407592774
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,64,8,128,1,float16,fp8,0,3.4856849670410157
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,64,64,128,1,float16,float16,0,2.3909183502197267
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,64,8,128,1,fp8,fp8,0,3.4557376861572267
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,64,64,128,1,fp8,fp8,0,1.6553920745849608
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,64,64,128,1,float16,fp8,0,2.152667236328125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,64,1,128,1,float16,fp8,0,1.602859115600586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,64,1,128,1,float16,float16,0,2.5909711837768556
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,64,1,128,1,fp8,fp8,0,1.587782382965088
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,64,2,128,1,float16,float16,0,2.1601295471191406
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,64,2,128,1,float16,fp8,0,1.6992847442626953
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,64,4,128,1,float16,float16,0,1.7394752502441406
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,64,2,128,1,fp8,fp8,0,1.7919679641723634
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,64,4,128,1,float16,fp8,0,2.4080480575561523
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,64,4,128,1,fp8,fp8,0,1.7001615524291993
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,64,8,128,1,float16,fp8,0,1.592129611968994
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,64,8,128,1,float16,float16,0,2.429327964782715
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,64,8,128,1,fp8,fp8,0,1.6727823257446288
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,64,1,128,1,float16,fp8,0,9.3170654296875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,64,1,128,1,fp8,fp8,0,9.174974060058593
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,64,1,128,1,float16,float16,0,15.514794921875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,64,2,128,1,float16,float16,0,16.595620727539064
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,64,2,128,1,float16,fp8,0,9.451367950439453
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,64,2,128,1,fp8,fp8,0,9.284255981445312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,64,4,128,1,float16,fp8,0,9.769992065429687
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,64,4,128,1,fp8,fp8,0,9.652706909179688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,64,4,128,1,float16,float16,0,15.628219604492188
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,64,8,128,1,float16,float16,0,15.835298156738281
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,64,8,128,1,float16,fp8,0,10.036019134521485
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,64,8,128,1,fp8,fp8,0,9.566862487792969
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,64,64,128,1,float16,float16,0,8.239600372314452
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,64,64,128,1,float16,fp8,0,4.9731487274169925
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,64,64,128,1,fp8,fp8,0,5.3622688293457035
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,64,1,128,1,float16,fp8,0,4.530712127685547
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,64,1,128,1,float16,float16,0,7.442691040039063
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,64,1,128,1,fp8,fp8,0,4.577729415893555
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,64,2,128,1,float16,fp8,0,4.538238525390625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,64,2,128,1,fp8,fp8,0,4.726785659790039
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,64,2,128,1,float16,float16,0,8.518339538574219
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,64,4,128,1,float16,float16,0,7.299892425537109
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,64,4,128,1,float16,fp8,0,4.6927745819091795
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,64,4,128,1,fp8,fp8,0,4.655049514770508
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,64,8,128,1,float16,fp8,0,4.543239974975586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,64,8,128,1,fp8,fp8,0,4.741648101806641
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,64,64,128,1,float16,float16,0,3.813460922241211
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,64,8,128,1,float16,float16,0,8.405068969726562
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,64,64,128,1,float16,fp8,0,2.571971130371094
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,64,64,128,1,fp8,fp8,0,2.3871679306030273
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,64,1,128,1,fp8,fp8,0,2.2335472106933594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,64,1,128,1,float16,float16,0,3.7472606658935548
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,64,1,128,1,float16,fp8,0,2.8581216812133787
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,64,2,128,1,float16,float16,0,3.429364776611328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,64,2,128,1,float16,fp8,0,2.17248477935791
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,64,2,128,1,fp8,fp8,0,2.447819137573242
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,64,4,128,1,float16,float16,0,3.3058719635009766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,64,4,128,1,float16,fp8,0,2.6690383911132813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,64,4,128,1,fp8,fp8,0,2.3003471374511717
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,64,64,128,1,float16,float16,0,1.512771224975586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,64,8,128,1,float16,float16,0,3.347727966308594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,64,8,128,1,fp8,fp8,0,2.2711456298828123
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,64,8,128,1,float16,fp8,0,2.6408735275268556
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,64,64,128,1,float16,fp8,0,1.3911824226379395
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,64,64,128,1,fp8,fp8,0,1.490340805053711
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,64,1,128,1,float16,float16,0,1.4176912307739258
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,64,1,128,1,float16,fp8,0,1.2712688446044922
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,64,1,128,1,fp8,fp8,0,1.1202176094055176
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,64,2,128,1,float16,float16,0,1.2833392143249511
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,64,2,128,1,fp8,fp8,0,1.1308287620544433
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,64,2,128,1,float16,fp8,0,1.3996288299560546
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,64,4,128,1,float16,float16,0,1.258579158782959
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,64,4,128,1,float16,fp8,0,1.2868464469909668
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,64,4,128,1,fp8,fp8,0,1.1296591758728027
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,64,8,128,1,float16,float16,0,1.3827679634094239
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,64,8,128,1,fp8,fp8,0,1.1367631912231446
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,64,8,128,1,float16,fp8,0,1.1983872413635255
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,64,1,128,1,fp8,fp8,0,12.15963363647461
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,64,1,128,1,float16,fp8,0,13.130755615234374
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,64,1,128,1,float16,float16,0,20.256423950195312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,64,2,128,1,float16,float16,0,18.713246154785157
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,64,2,128,1,float16,fp8,0,12.647586822509766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,64,2,128,1,fp8,fp8,0,13.626687622070312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,64,4,128,1,float16,fp8,0,12.515219116210938
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,64,4,128,1,fp8,fp8,0,12.635189056396484
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,64,4,128,1,float16,float16,0,20.38158721923828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,64,8,128,1,float16,float16,0,22.112815856933594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,64,8,128,1,float16,fp8,0,12.168946838378906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,64,8,128,1,fp8,fp8,0,12.74163818359375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,64,64,128,1,float16,float16,0,10.991750335693359
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,64,64,128,1,fp8,fp8,0,6.6007743835449215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,64,64,128,1,float16,fp8,0,6.890969848632812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,64,1,128,1,float16,fp8,0,6.165862274169922
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,64,1,128,1,float16,float16,0,9.948722839355469
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,64,1,128,1,fp8,fp8,0,5.889300918579101
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,64,2,128,1,float16,fp8,0,6.057166290283203
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,64,2,128,1,float16,float16,0,10.664649963378906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,64,2,128,1,fp8,fp8,0,6.064980697631836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,64,4,128,1,float16,fp8,0,6.255897521972656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,64,4,128,1,float16,float16,0,9.705881500244141
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,64,4,128,1,fp8,fp8,0,6.532039642333984
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,64,8,128,1,float16,float16,0,9.813788604736327
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,64,8,128,1,float16,fp8,0,6.394179153442383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,64,8,128,1,fp8,fp8,0,6.174679946899414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,64,64,128,1,float16,float16,0,5.650270462036133
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,64,64,128,1,float16,fp8,0,3.2397937774658203
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,64,64,128,1,fp8,fp8,0,3.172096061706543
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,64,1,128,1,float16,float16,0,4.34587516784668
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,64,1,128,1,fp8,fp8,0,2.9605520248413084
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,64,1,128,1,float16,fp8,0,3.095351982116699
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,64,2,128,1,float16,float16,0,4.7418975830078125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,64,2,128,1,float16,fp8,0,3.1449392318725584
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,64,2,128,1,fp8,fp8,0,2.963390350341797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,64,4,128,1,float16,float16,0,4.772763061523437
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,64,4,128,1,fp8,fp8,0,2.8264511108398436
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,64,4,128,1,float16,fp8,0,3.1375295639038088
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,64,8,128,1,float16,float16,0,4.602759933471679
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,64,64,128,1,float16,float16,0,2.1494287490844726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,64,8,128,1,fp8,fp8,0,2.801198387145996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,64,8,128,1,float16,fp8,0,3.5234001159667967
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,64,64,128,1,float16,fp8,0,1.970604705810547
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,64,1,128,1,float16,float16,0,1.670243263244629
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,64,1,128,1,float16,fp8,0,1.4990976333618165
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,64,64,128,1,fp8,fp8,0,2.203500747680664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,64,1,128,1,fp8,fp8,0,1.428718376159668
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,64,2,128,1,float16,fp8,0,1.4356783866882323
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,64,2,128,1,fp8,fp8,0,1.4704928398132324
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,64,2,128,1,float16,float16,0,2.335468864440918
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,64,4,128,1,float16,float16,0,1.6235359191894532
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,64,4,128,1,fp8,fp8,0,1.459444808959961
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,64,4,128,1,float16,fp8,0,1.548083209991455
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,64,64,128,1,float16,float16,0,0.9243040084838867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,64,8,128,1,float16,fp8,0,1.6211328506469727
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,64,8,128,1,fp8,fp8,0,1.4385647773742676
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,64,8,128,1,float16,float16,0,2.465403175354004
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,64,64,128,1,float16,fp8,0,1.23886079788208
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,64,64,128,1,fp8,fp8,0,0.8527487754821778
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,64,1,128,1,float16,float16,0,0.8665904045104981
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,64,1,128,1,float16,fp8,0,1.1304335594177246
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,64,2,128,1,float16,float16,0,0.8585663795471191
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,64,1,128,1,fp8,fp8,0,0.9018207550048828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,64,2,128,1,float16,fp8,0,0.8602864265441894
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,64,2,128,1,fp8,fp8,0,0.8441264152526855
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,64,4,128,1,float16,float16,0,0.8640751838684082
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,64,4,128,1,float16,fp8,0,0.8223471641540527
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,64,4,128,1,fp8,fp8,0,0.7595424175262451
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,64,8,128,1,float16,float16,0,0.8592399597167969
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,64,8,128,1,fp8,fp8,0,0.8205488204956055
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,64,8,128,1,float16,fp8,0,0.963542366027832
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,64,1,128,1,float16,fp8,0,6.843089294433594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,64,1,128,1,fp8,fp8,0,7.647564697265625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,64,1,128,1,float16,float16,0,11.325750732421875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,64,2,128,1,float16,float16,0,11.531401824951171
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,64,2,128,1,float16,fp8,0,7.1361244201660154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,64,2,128,1,fp8,fp8,0,7.465478515625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,64,4,128,1,float16,fp8,0,7.138246154785156
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,64,4,128,1,fp8,fp8,0,6.915046691894531
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,64,4,128,1,float16,float16,0,12.033870697021484
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,64,8,128,1,float16,fp8,0,7.298579406738281
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,64,8,128,1,float16,float16,0,12.9196533203125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,64,64,128,1,float16,float16,0,6.309267044067383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,64,8,128,1,fp8,fp8,0,7.221462249755859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,64,64,128,1,float16,fp8,0,3.7415935516357424
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,64,1,128,1,float16,fp8,0,3.4158512115478517
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,64,64,128,1,fp8,fp8,0,4.020721435546875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,64,1,128,1,fp8,fp8,0,3.3875614166259767
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,64,1,128,1,float16,float16,0,5.586028671264648
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,64,2,128,1,float16,fp8,0,3.5117088317871095
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,64,2,128,1,fp8,fp8,0,3.517695999145508
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,64,2,128,1,float16,float16,0,5.4626609802246096
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,64,4,128,1,float16,float16,0,5.739259338378906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,64,4,128,1,float16,fp8,0,3.5910400390625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,64,4,128,1,fp8,fp8,0,3.490816116333008
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,64,8,128,1,float16,float16,0,5.808443069458008
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,64,8,128,1,float16,fp8,0,3.556217575073242
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,64,8,128,1,fp8,fp8,0,3.4192543029785156
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,64,64,128,1,float16,fp8,0,1.8425712585449219
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,64,64,128,1,float16,float16,0,2.9327743530273436
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,64,64,128,1,fp8,fp8,0,1.9683584213256835
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,64,1,128,1,float16,float16,0,2.1856815338134767
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,64,1,128,1,fp8,fp8,0,1.6822271347045898
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,64,2,128,1,float16,float16,0,1.822439956665039
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,64,1,128,1,float16,fp8,0,2.0221584320068358
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,64,2,128,1,fp8,fp8,0,1.652390480041504
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,64,4,128,1,float16,fp8,0,1.6722751617431642
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,64,2,128,1,float16,fp8,0,1.991801643371582
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,64,4,128,1,float16,float16,0,2.5203088760375976
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,64,4,128,1,fp8,fp8,0,1.660038375854492
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,64,8,128,1,float16,fp8,0,1.673321533203125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,64,8,128,1,float16,float16,0,2.214316749572754
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,64,64,128,1,float16,float16,0,1.0629664421081544
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,64,64,128,1,float16,fp8,0,0.9745120048522949
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,64,8,128,1,fp8,fp8,0,2.1337488174438475
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,64,1,128,1,float16,float16,0,1.1651200294494628
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,64,64,128,1,fp8,fp8,0,1.5360752105712892
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,64,1,128,1,float16,fp8,0,0.8635295867919922
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,64,1,128,1,fp8,fp8,0,0.8829024314880372
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,64,2,128,1,float16,fp8,0,0.857539176940918
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,64,2,128,1,fp8,fp8,0,0.8618176460266114
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,64,2,128,1,float16,float16,0,1.348151969909668
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,64,4,128,1,float16,float16,0,0.9619343757629395
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,64,4,128,1,float16,fp8,0,0.846225643157959
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,64,4,128,1,fp8,fp8,0,0.8542511940002442
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,64,64,128,1,float16,float16,0,0.5783584117889404
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,64,8,128,1,float16,fp8,0,1.1861632347106934
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,64,8,128,1,fp8,fp8,0,0.8449104309082032
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,64,8,128,1,float16,float16,0,1.4737903594970703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,64,64,128,1,float16,fp8,0,0.5117568016052246
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,64,64,128,1,fp8,fp8,0,0.548686408996582
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,64,1,128,1,float16,float16,0,0.5128335952758789
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,64,1,128,1,float16,fp8,0,0.7441711902618409
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,64,1,128,1,fp8,fp8,0,0.46296000480651855
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,64,2,128,1,float16,fp8,0,0.4591407775878906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,64,2,128,1,float16,float16,0,0.5319551944732666
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,64,2,128,1,fp8,fp8,0,0.5243792057037353
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,64,4,128,1,float16,float16,0,0.5255663871765137
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,64,4,128,1,float16,fp8,0,0.4569727897644043
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,64,4,128,1,fp8,fp8,0,0.46203041076660156
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,64,8,128,1,float16,float16,0,0.5350944042205811
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,64,8,128,1,float16,fp8,0,0.4633471965789795
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,64,8,128,1,fp8,fp8,0,0.4560416221618652
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,64,1,128,1,fp8,fp8,0,6.38502082824707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,64,1,128,1,float16,fp8,0,6.604524993896485
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,64,2,128,1,float16,float16,0,10.142922973632812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,64,1,128,1,float16,float16,0,10.7265869140625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,64,2,128,1,float16,fp8,0,6.627528381347656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,64,2,128,1,fp8,fp8,0,6.703307342529297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,64,4,128,1,float16,fp8,0,7.2526191711425785
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,64,4,128,1,float16,float16,0,10.845985412597656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,64,4,128,1,fp8,fp8,0,6.7817840576171875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,64,8,128,1,float16,float16,0,10.997713470458985
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,64,8,128,1,float16,fp8,0,7.489566040039063
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,64,64,128,1,float16,float16,0,6.28460807800293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,64,8,128,1,fp8,fp8,0,6.697100830078125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,64,64,128,1,float16,fp8,0,3.6828369140625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,64,64,128,1,fp8,fp8,0,3.6664974212646486
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,64,1,128,1,float16,fp8,0,3.1802160263061525
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,64,1,128,1,fp8,fp8,0,3.0685440063476563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,64,1,128,1,float16,float16,0,5.2656303405761715
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,64,2,128,1,float16,fp8,0,3.1704128265380858
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,64,2,128,1,float16,float16,0,5.101732635498047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,64,2,128,1,fp8,fp8,0,3.2413822174072267
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,64,4,128,1,float16,float16,0,5.314724731445312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,64,4,128,1,float16,fp8,0,3.1221696853637697
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,64,4,128,1,fp8,fp8,0,3.4857742309570314
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,64,8,128,1,float16,float16,0,4.873603057861328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,64,8,128,1,fp8,fp8,0,3.1445423126220704
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,64,64,128,1,float16,float16,0,2.448580741882324
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,64,8,128,1,float16,fp8,0,3.215283203125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,64,64,128,1,float16,fp8,0,2.0530080795288086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,64,1,128,1,float16,fp8,0,1.5895312309265137
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,64,64,128,1,fp8,fp8,0,1.7905935287475585
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,64,1,128,1,fp8,fp8,0,1.5953935623168944
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,64,1,128,1,float16,float16,0,2.923531150817871
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,64,2,128,1,float16,float16,0,1.7607696533203125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,64,2,128,1,float16,fp8,0,1.5541680335998536
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,64,2,128,1,fp8,fp8,0,1.661252784729004
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,64,4,128,1,fp8,fp8,0,1.5506352424621581
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,64,4,128,1,float16,fp8,0,1.8460304260253906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,64,4,128,1,float16,float16,0,2.8454959869384764
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,64,8,128,1,float16,float16,0,1.8339824676513672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,64,8,128,1,float16,fp8,0,1.6914991378784179
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,64,8,128,1,fp8,fp8,0,1.586571216583252
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,64,64,128,1,float16,float16,0,1.5934224128723145
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,64,64,128,1,float16,fp8,0,1.2888367652893067
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,64,64,128,1,fp8,fp8,0,0.9412511825561524
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,64,1,128,1,float16,float16,0,0.9529151916503906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,64,1,128,1,fp8,fp8,0,0.7981760025024414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,64,1,128,1,float16,fp8,0,1.201318359375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,64,2,128,1,float16,float16,0,0.8757391929626465
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,64,2,128,1,float16,fp8,0,0.9790847778320313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,64,2,128,1,fp8,fp8,0,0.7965807914733887
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,64,4,128,1,float16,float16,0,0.9884256362915039
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,64,4,128,1,float16,fp8,0,1.1821488380432128
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,64,4,128,1,fp8,fp8,0,0.8552960395812989
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,64,8,128,1,float16,fp8,0,0.804964828491211
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,64,8,128,1,fp8,fp8,0,0.82882080078125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,64,8,128,1,float16,float16,0,1.255244827270508
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,64,64,128,1,float16,fp8,0,0.4884079933166504
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,64,64,128,1,float16,float16,0,0.5434544086456299
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,64,64,128,1,fp8,fp8,0,0.6610591888427735
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,64,1,128,1,float16,fp8,0,0.4230656147003174
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,64,1,128,1,float16,float16,0,0.4778111934661865
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,64,1,128,1,fp8,fp8,0,0.5356527805328369
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,64,2,128,1,float16,float16,0,0.4684912204742432
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,64,2,128,1,float16,fp8,0,0.42856640815734864
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,64,2,128,1,fp8,fp8,0,0.4991312026977539
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,64,4,128,1,float16,fp8,0,0.4228511810302734
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,64,4,128,1,float16,float16,0,0.4697904109954834
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,64,4,128,1,fp8,fp8,0,0.423308801651001
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,64,8,128,1,float16,float16,0,0.48224639892578125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,64,8,128,1,float16,fp8,0,0.4209727764129639
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,64,64,128,1,float16,float16,0,0.30039200782775877
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,64,64,128,1,float16,fp8,0,0.30517919063568116
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,64,8,128,1,fp8,fp8,0,0.4228672027587891
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,64,1,128,1,float16,float16,0,0.25973920822143554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,64,64,128,1,fp8,fp8,0,0.26659040451049804
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,64,1,128,1,float16,fp8,0,0.23238399028778076
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,64,1,128,1,fp8,fp8,0,0.23120639324188233
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,64,2,128,1,float16,fp8,0,0.2335968017578125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,64,2,128,1,float16,float16,0,0.2633039951324463
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,64,2,128,1,fp8,fp8,0,0.2309135913848877
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,64,4,128,1,float16,fp8,0,0.2399616003036499
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,64,4,128,1,float16,float16,0,0.26017439365386963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,64,4,128,1,fp8,fp8,0,0.23167519569396972
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,64,8,128,1,float16,fp8,0,0.23372159004211426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,64,8,128,1,float16,float16,0,0.26525120735168456
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,64,8,128,1,fp8,fp8,0,0.2317296028137207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,64,1,128,1,float16,fp8,0,3.7713249206542967
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,64,1,128,1,fp8,fp8,0,3.632289505004883
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,64,2,128,1,float16,float16,0,4.893059158325196
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,64,1,128,1,float16,float16,0,5.827657699584961
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,64,2,128,1,float16,fp8,0,3.782619094848633
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,64,2,128,1,fp8,fp8,0,3.73359375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,64,4,128,1,float16,fp8,0,3.916254425048828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,64,4,128,1,float16,float16,0,5.830985641479492
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,64,4,128,1,fp8,fp8,0,3.83337287902832
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,64,8,128,1,float16,float16,0,5.956167984008789
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,64,8,128,1,float16,fp8,0,4.031327819824218
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,64,64,128,1,float16,float16,0,2.8849519729614257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,64,8,128,1,fp8,fp8,0,3.711985778808594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,64,64,128,1,float16,fp8,0,2.2673919677734373
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,64,64,128,1,fp8,fp8,0,2.2462528228759764
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,64,1,128,1,float16,fp8,0,1.842300796508789
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,64,1,128,1,float16,float16,0,2.9373184204101563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,64,1,128,1,fp8,fp8,0,1.8432256698608398
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,64,2,128,1,float16,fp8,0,1.8302831649780273
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,64,2,128,1,float16,float16,0,2.5681312561035154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,64,4,128,1,float16,float16,0,1.989094352722168
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,64,2,128,1,fp8,fp8,0,2.232129669189453
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,64,4,128,1,float16,fp8,0,1.8563072204589843
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,64,4,128,1,fp8,fp8,0,2.5918079376220704
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,64,8,128,1,float16,fp8,0,1.9361200332641602
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,64,8,128,1,fp8,fp8,0,1.918329620361328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,64,8,128,1,float16,float16,0,3.193462371826172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,64,64,128,1,float16,float16,0,1.3500991821289063
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,64,64,128,1,float16,fp8,0,1.231447982788086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,64,64,128,1,fp8,fp8,0,1.205726432800293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,64,1,128,1,fp8,fp8,0,1.0037520408630372
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,64,1,128,1,float16,float16,0,1.5268464088439941
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,64,1,128,1,float16,fp8,0,1.3353504180908202
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,64,2,128,1,float16,float16,0,1.0687711715698243
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,64,2,128,1,float16,fp8,0,0.9789440155029296
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,64,2,128,1,fp8,fp8,0,1.3569744110107422
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,64,4,128,1,float16,float16,0,1.229742431640625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,64,4,128,1,float16,fp8,0,1.1355999946594237
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,64,4,128,1,fp8,fp8,0,0.9382191658020019
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,64,8,128,1,float16,float16,0,1.0377023696899415
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,64,64,128,1,float16,float16,0,0.6606607913970948
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,64,8,128,1,float16,fp8,0,1.1248415946960448
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,64,8,128,1,fp8,fp8,0,1.0081104278564452
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,64,64,128,1,float16,fp8,0,0.5926896095275879
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,64,1,128,1,float16,float16,0,0.5784768104553223
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,64,1,128,1,float16,fp8,0,0.49315519332885743
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,64,64,128,1,fp8,fp8,0,0.7077023983001709
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,64,1,128,1,fp8,fp8,0,0.5326432228088379
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,64,2,128,1,float16,fp8,0,0.4923600196838379
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,64,2,128,1,float16,float16,0,0.5892496109008789
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,64,2,128,1,fp8,fp8,0,0.5520927906036377
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,64,4,128,1,float16,float16,0,0.5578288078308106
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,64,4,128,1,float16,fp8,0,0.4897808074951172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,64,4,128,1,fp8,fp8,0,0.5719744205474854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,64,8,128,1,float16,float16,0,0.555072021484375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,64,8,128,1,fp8,fp8,0,0.48827519416809084
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,64,8,128,1,float16,fp8,0,0.5311776161193847
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,64,64,128,1,float16,fp8,0,0.314903998374939
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,64,64,128,1,float16,float16,0,0.37532958984375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,64,64,128,1,fp8,fp8,0,0.312006402015686
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,64,1,128,1,fp8,fp8,0,0.27722079753875734
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,64,1,128,1,float16,fp8,0,0.3096832036972046
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,64,1,128,1,float16,float16,0,0.3108720064163208
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,64,2,128,1,float16,fp8,0,0.2773632049560547
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,64,2,128,1,float16,float16,0,0.2931328058242798
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,64,2,128,1,fp8,fp8,0,0.26749439239501954
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,64,4,128,1,float16,float16,0,0.2897567987442017
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,64,4,128,1,float16,fp8,0,0.26507999897003176
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,64,4,128,1,fp8,fp8,0,0.2659327983856201
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,64,8,128,1,float16,float16,0,0.29489119052886964
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,64,64,128,1,float16,float16,0,0.19289920330047608
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,64,8,128,1,float16,fp8,0,0.2651776075363159
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,64,8,128,1,fp8,fp8,0,0.26356480121612547
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,64,1,128,1,float16,float16,0,0.16164480447769164
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,64,64,128,1,fp8,fp8,0,0.17391040325164794
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,64,64,128,1,float16,fp8,0,0.18056000471115113
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,64,1,128,1,float16,fp8,0,0.14638079404830934
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,64,1,128,1,fp8,fp8,0,0.1476207971572876
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,64,2,128,1,float16,float16,0,0.16041120290756225
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,64,2,128,1,float16,fp8,0,0.14721120595932008
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,64,2,128,1,fp8,fp8,0,0.14803520441055298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,64,4,128,1,float16,float16,0,0.16082559823989867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,64,4,128,1,float16,fp8,0,0.14642080068588256
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,64,4,128,1,fp8,fp8,0,0.14722880125045776
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,64,8,128,1,float16,float16,0,0.16414239406585693
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,64,8,128,1,float16,fp8,0,0.14645600318908691
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,64,8,128,1,fp8,fp8,0,0.1466048002243042
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,64,1,128,1,fp8,fp8,0,3.6378448486328123
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,64,1,128,1,float16,fp8,0,3.67193603515625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,64,1,128,1,float16,float16,0,5.1587471008300785
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,64,2,128,1,float16,fp8,0,3.7004878997802733
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,64,2,128,1,fp8,fp8,0,3.6591392517089845
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,64,2,128,1,float16,float16,0,5.4418590545654295
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,64,4,128,1,float16,float16,0,5.875665664672852
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,64,4,128,1,float16,fp8,0,3.6817630767822265
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,64,4,128,1,fp8,fp8,0,3.748219299316406
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,64,8,128,1,float16,float16,0,5.885662460327149
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,64,64,128,1,float16,float16,0,2.9627664566040037
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,64,8,128,1,float16,fp8,0,3.7061920166015625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,64,8,128,1,fp8,fp8,0,3.7293136596679686
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,64,1,128,1,float16,float16,0,2.1353071212768553
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,64,1,128,1,float16,fp8,0,1.8760751724243163
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,64,64,128,1,fp8,fp8,0,2.3933040618896486
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,64,64,128,1,float16,fp8,0,2.3708911895751954
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,64,2,128,1,fp8,fp8,0,1.8147584915161132
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,64,1,128,1,fp8,fp8,0,2.09789924621582
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,64,2,128,1,float16,fp8,0,1.8147968292236327
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,64,2,128,1,float16,float16,0,2.293854331970215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,64,4,128,1,float16,fp8,0,1.891632080078125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,64,4,128,1,float16,float16,0,1.9824512481689454
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,64,4,128,1,fp8,fp8,0,1.9199792861938476
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,64,8,128,1,float16,float16,0,3.1872671127319334
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,64,64,128,1,float16,float16,0,1.409175968170166
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,64,8,128,1,float16,fp8,0,1.8933408737182618
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,64,8,128,1,fp8,fp8,0,2.3477760314941407
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,64,1,128,1,float16,float16,0,0.9910016059875488
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,64,64,128,1,float16,fp8,0,1.495644760131836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,64,64,128,1,fp8,fp8,0,1.3687775611877442
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,64,1,128,1,float16,fp8,0,1.0806768417358399
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,64,1,128,1,fp8,fp8,0,0.933471965789795
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,64,2,128,1,float16,float16,0,1.0240896224975586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,64,2,128,1,float16,fp8,0,0.9427231788635254
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,64,2,128,1,fp8,fp8,0,0.9337984085083008
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,64,4,128,1,float16,float16,0,0.99891357421875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,64,4,128,1,fp8,fp8,0,0.919156837463379
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,64,4,128,1,float16,fp8,0,1.1384032249450684
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,64,8,128,1,float16,float16,0,1.0609071731567383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,64,8,128,1,float16,fp8,0,0.9293647766113281
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,64,64,128,1,float16,float16,0,0.7359248161315918
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,64,8,128,1,fp8,fp8,0,0.9235440254211426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,64,64,128,1,float16,fp8,0,0.7444528102874756
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,64,1,128,1,float16,float16,0,0.5746335983276367
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,64,64,128,1,fp8,fp8,0,0.6928192138671875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,64,1,128,1,float16,fp8,0,0.48186559677124025
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,64,1,128,1,fp8,fp8,0,0.5259439945220947
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,64,2,128,1,float16,fp8,0,0.47669281959533694
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,64,2,128,1,float16,float16,0,0.5872128009796143
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,64,2,128,1,fp8,fp8,0,0.48032479286193847
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,64,4,128,1,float16,float16,0,0.5237120151519775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,64,4,128,1,float16,fp8,0,0.47925920486450196
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,64,8,128,1,float16,float16,0,0.5327424049377442
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,64,4,128,1,fp8,fp8,0,0.6351808071136474
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,64,8,128,1,float16,fp8,0,0.47856640815734863
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,64,8,128,1,fp8,fp8,0,0.47679362297058103
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,64,64,128,1,float16,float16,0,0.3706063985824585
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,64,64,128,1,float16,fp8,0,0.3904383897781372
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,64,1,128,1,float16,float16,0,0.27281439304351807
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,64,64,128,1,fp8,fp8,0,0.32076001167297363
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,64,1,128,1,fp8,fp8,0,0.2627295970916748
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,64,1,128,1,float16,fp8,0,0.3164112091064453
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,64,2,128,1,float16,float16,0,0.27299520969390867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,64,2,128,1,float16,fp8,0,0.2592816114425659
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,64,2,128,1,fp8,fp8,0,0.2552687883377075
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,64,4,128,1,float16,float16,0,0.32846879959106445
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,64,4,128,1,float16,fp8,0,0.2559439897537231
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,64,4,128,1,fp8,fp8,0,0.25719680786132815
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,64,8,128,1,float16,float16,0,0.28500640392303467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,64,64,128,1,float16,float16,0,0.1941920042037964
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,64,8,128,1,fp8,fp8,0,0.25623359680175783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,64,8,128,1,float16,fp8,0,0.3168047904968262
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,64,64,128,1,float16,fp8,0,0.17392959594726562
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,64,1,128,1,float16,float16,0,0.1516047954559326
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,64,64,128,1,fp8,fp8,0,0.174782395362854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,64,1,128,1,float16,fp8,0,0.13888319730758666
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,64,1,128,1,fp8,fp8,0,0.13990720510482788
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,64,2,128,1,float16,float16,0,0.15200639963150026
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,64,2,128,1,float16,fp8,0,0.14029760360717775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,64,2,128,1,fp8,fp8,0,0.13918399810791016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,64,4,128,1,float16,float16,0,0.15406399965286255
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,64,4,128,1,float16,fp8,0,0.14036799669265748
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,64,4,128,1,fp8,fp8,0,0.1393728017807007
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,64,8,128,1,float16,float16,0,0.15281280279159545
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,64,8,128,1,float16,fp8,0,0.14150400161743165
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,64,8,128,1,fp8,fp8,0,0.1397744059562683
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,64,64,128,1,float16,fp8,0,0.09883360266685486
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,64,64,128,1,float16,float16,0,0.11051520109176635
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,64,64,128,1,fp8,fp8,0,0.09737600088119507
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,64,1,128,1,float16,float16,0,0.08649920225143433
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,64,1,128,1,float16,fp8,0,0.08086400032043457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,64,1,128,1,fp8,fp8,0,0.08028159737586975
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,64,2,128,1,float16,float16,0,0.08651040196418762
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,64,2,128,1,float16,fp8,0,0.08094559907913208
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,64,2,128,1,fp8,fp8,0,0.08013120293617249
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,64,4,128,1,float16,float16,0,0.08806080222129822
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,64,4,128,1,float16,fp8,0,0.08022239804267883
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,64,4,128,1,fp8,fp8,0,0.08024479746818543
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,64,8,128,1,float16,float16,0,0.08816480040550231
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,64,8,128,1,float16,fp8,0,0.08064159750938416
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,64,8,128,1,fp8,fp8,0,0.08023840188980103
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,64,1,128,1,float16,fp8,0,2.2478511810302733
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,64,1,128,1,fp8,fp8,0,2.2515584945678713
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,64,1,128,1,float16,float16,0,3.039255905151367
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,64,2,128,1,float16,fp8,0,2.26407527923584
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,64,2,128,1,float16,float16,0,2.5091167449951173
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,64,2,128,1,fp8,fp8,0,2.331979179382324
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,64,4,128,1,float16,float16,0,2.561289596557617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,64,4,128,1,float16,fp8,0,2.278387260437012
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,64,4,128,1,fp8,fp8,0,2.2530143737792967
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,64,8,128,1,float16,float16,0,2.45043830871582
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,64,8,128,1,float16,fp8,0,2.2453119277954103
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,64,64,128,1,float16,float16,0,1.6962976455688477
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,64,8,128,1,fp8,fp8,0,2.2670671463012697
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,64,64,128,1,float16,fp8,0,1.6360336303710938
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,64,1,128,1,float16,float16,0,1.4761311531066894
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,64,1,128,1,float16,fp8,0,1.1812479972839356
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,64,64,128,1,fp8,fp8,0,1.5583999633789063
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,64,1,128,1,fp8,fp8,0,1.1429471969604492
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,64,2,128,1,float16,fp8,0,1.140552043914795
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,64,2,128,1,fp8,fp8,0,1.137395191192627
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,64,2,128,1,float16,float16,0,1.4015263557434081
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,64,4,128,1,float16,float16,0,1.2207247734069824
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,64,4,128,1,float16,fp8,0,1.138150405883789
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,64,4,128,1,fp8,fp8,0,1.1332624435424805
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,64,8,128,1,float16,fp8,0,1.4615504264831543
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,64,8,128,1,float16,float16,0,1.7177663803100587
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,64,64,128,1,float16,float16,0,0.8691535949707031
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,64,8,128,1,fp8,fp8,0,1.143735980987549
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,64,64,128,1,float16,fp8,0,0.803219223022461
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,64,1,128,1,float16,fp8,0,0.588705587387085
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,64,1,128,1,float16,float16,0,0.625161600112915
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,64,64,128,1,fp8,fp8,0,1.1060319900512696
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,64,1,128,1,fp8,fp8,0,0.603550386428833
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,64,2,128,1,float16,float16,0,0.6251039981842041
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,64,2,128,1,float16,fp8,0,0.732313585281372
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,64,2,128,1,fp8,fp8,0,0.78439359664917
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,64,4,128,1,float16,float16,0,0.6301616191864013
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,64,4,128,1,float16,fp8,0,0.5836991786956787
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,64,8,128,1,float16,float16,0,0.6381855964660644
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,64,4,128,1,fp8,fp8,0,0.7204063892364502
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,64,8,128,1,float16,fp8,0,0.5815487861633301
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,64,8,128,1,fp8,fp8,0,0.5816112041473389
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,64,64,128,1,float16,float16,0,0.4476624011993408
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,64,64,128,1,float16,fp8,0,0.4009136199951172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,64,64,128,1,fp8,fp8,0,0.4166704177856445
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,64,1,128,1,float16,float16,0,0.32999041080474856
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,64,1,128,1,float16,fp8,0,0.3054944038391113
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,64,1,128,1,fp8,fp8,0,0.3063807964324951
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,64,2,128,1,float16,float16,0,0.328601598739624
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,64,2,128,1,float16,fp8,0,0.30697600841522216
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,64,2,128,1,fp8,fp8,0,0.3040607929229736
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,64,4,128,1,float16,fp8,0,0.3076303958892822
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,64,4,128,1,float16,float16,0,0.32987198829650877
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,64,4,128,1,fp8,fp8,0,0.3070768117904663
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,64,8,128,1,float16,float16,0,0.3340496063232422
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,64,8,128,1,float16,fp8,0,0.3062527894973755
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,64,8,128,1,fp8,fp8,0,0.30349600315093994
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,64,64,128,1,float16,float16,0,0.23797121047973632
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,64,64,128,1,float16,fp8,0,0.21332800388336182
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,64,64,128,1,fp8,fp8,0,0.21348159313201903
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,64,1,128,1,float16,float16,0,0.17700959444046022
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,64,1,128,1,fp8,fp8,0,0.16403679847717284
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,64,1,128,1,float16,fp8,0,0.1648576021194458
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,64,2,128,1,float16,float16,0,0.17707359790802002
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,64,2,128,1,float16,fp8,0,0.1647104024887085
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,64,2,128,1,fp8,fp8,0,0.16374720335006715
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,64,4,128,1,float16,float16,0,0.17990399599075318
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,64,4,128,1,float16,fp8,0,0.16477279663085936
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,64,4,128,1,fp8,fp8,0,0.1637231945991516
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,64,8,128,1,float16,float16,0,0.1804111957550049
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,64,8,128,1,float16,fp8,0,0.16452800035476683
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,64,8,128,1,fp8,fp8,0,0.16547839641571044
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,64,64,128,1,float16,float16,0,0.12787519693374633
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,64,64,128,1,float16,fp8,0,0.11759840250015259
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,64,64,128,1,fp8,fp8,0,0.11741600036621094
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,64,1,128,1,float16,float16,0,0.09860000014305115
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,64,1,128,1,float16,fp8,0,0.09067999720573425
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,64,1,128,1,fp8,fp8,0,0.09010720252990723
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,64,2,128,1,float16,float16,0,0.09880959987640381
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,64,2,128,1,float16,fp8,0,0.09051200151443481
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,64,2,128,1,fp8,fp8,0,0.09130240082740784
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,64,4,128,1,float16,float16,0,0.0999184012413025
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,64,4,128,1,float16,fp8,0,0.09093120098114013
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,64,4,128,1,fp8,fp8,0,0.0906607985496521
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,64,8,128,1,float16,float16,0,0.1023743987083435
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,64,8,128,1,float16,fp8,0,0.090174400806427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,64,64,128,1,float16,float16,0,0.07609120011329651
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,64,8,128,1,fp8,fp8,0,0.0903823971748352
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,64,64,128,1,float16,fp8,0,0.06790239810943603
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,64,64,128,1,fp8,fp8,0,0.0678816020488739
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,64,1,128,1,float16,float16,0,0.05971199870109558
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,64,1,128,1,float16,fp8,0,0.0557744026184082
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,64,1,128,1,fp8,fp8,0,0.05589759945869446
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,64,2,128,1,float16,float16,0,0.05912960171699524
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,64,2,128,1,float16,fp8,0,0.0555840015411377
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,64,2,128,1,fp8,fp8,0,0.055902397632598876
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,64,4,128,1,float16,float16,0,0.05968959927558899
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,64,4,128,1,float16,fp8,0,0.05590559840202332
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,64,4,128,1,fp8,fp8,0,0.05580959916114807
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,64,8,128,1,float16,float16,0,0.06046079993247986
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,64,8,128,1,float16,fp8,0,0.05551360249519348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,64,8,128,1,fp8,fp8,0,0.05568000078201294
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,64,1,128,1,float16,fp8,0,2.374843215942383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,64,1,128,1,fp8,fp8,0,2.3759632110595703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,64,1,128,1,float16,float16,0,2.459062385559082
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,64,2,128,1,float16,fp8,0,2.3948127746582033
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,64,2,128,1,float16,float16,0,2.4741775512695314
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,64,2,128,1,fp8,fp8,0,2.5146480560302735
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,64,4,128,1,float16,fp8,0,2.3712799072265627
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,64,4,128,1,float16,float16,0,2.8591232299804688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,64,4,128,1,fp8,fp8,0,2.4347904205322264
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,64,8,128,1,float16,float16,0,3.029337692260742
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,64,64,128,1,float16,float16,0,2.0073408126831054
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,64,8,128,1,float16,fp8,0,2.407222366333008
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,64,8,128,1,fp8,fp8,0,2.4096048355102537
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,64,64,128,1,float16,fp8,0,1.897315216064453
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,64,1,128,1,float16,float16,0,1.2638784408569337
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,64,1,128,1,float16,fp8,0,1.2133328437805175
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,64,64,128,1,fp8,fp8,0,1.7571535110473633
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,64,1,128,1,fp8,fp8,0,1.1990976333618164
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,64,2,128,1,float16,float16,0,1.3163616180419921
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,64,2,128,1,float16,fp8,0,1.206991958618164
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,64,2,128,1,fp8,fp8,0,1.199512004852295
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,64,4,128,1,float16,float16,0,1.2634287834167481
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,64,4,128,1,float16,fp8,0,1.548744010925293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,64,4,128,1,fp8,fp8,0,1.2000176429748535
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,64,8,128,1,float16,float16,0,1.2883760452270507
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,64,8,128,1,float16,fp8,0,1.2010416030883788
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,64,64,128,1,float16,fp8,0,0.9232255935668945
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,64,64,128,1,float16,float16,0,0.9760704040527344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,64,8,128,1,fp8,fp8,0,1.1960368156433105
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,64,64,128,1,fp8,fp8,0,0.8855216026306152
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,64,1,128,1,float16,float16,0,0.6318960189819336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,64,1,128,1,fp8,fp8,0,0.6156271934509278
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,64,2,128,1,float16,float16,0,0.6379903793334961
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,64,1,128,1,float16,fp8,0,0.7517407894134521
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,64,2,128,1,float16,fp8,0,0.6107791900634766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,64,2,128,1,fp8,fp8,0,0.614247989654541
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,64,4,128,1,float16,float16,0,0.6482111930847168
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,64,4,128,1,float16,fp8,0,0.6146527767181397
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,64,4,128,1,fp8,fp8,0,0.6547711849212646
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,64,8,128,1,float16,float16,0,0.6704304218292236
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,64,8,128,1,fp8,fp8,0,0.609884786605835
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,64,8,128,1,float16,fp8,0,0.8071120262145997
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,64,64,128,1,float16,float16,0,0.499726390838623
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,64,64,128,1,float16,fp8,0,0.4441487789154053
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,64,1,128,1,float16,float16,0,0.42204961776733396
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,64,64,128,1,fp8,fp8,0,0.4452511787414551
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,64,1,128,1,float16,fp8,0,0.32060320377349855
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,64,1,128,1,fp8,fp8,0,0.31880319118499756
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,64,2,128,1,fp8,fp8,0,0.32097439765930175
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,64,2,128,1,float16,float16,0,0.3640912055969238
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,64,2,128,1,float16,fp8,0,0.31921279430389404
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,64,4,128,1,float16,float16,0,0.33925440311431887
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,64,4,128,1,float16,fp8,0,0.3210752010345459
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,64,4,128,1,fp8,fp8,0,0.3608319997787476
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,64,8,128,1,float16,float16,0,0.3506272077560425
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,64,8,128,1,float16,fp8,0,0.3165760040283203
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,64,8,128,1,fp8,fp8,0,0.3151808023452759
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,64,64,128,1,float16,float16,0,0.27059359550476075
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,64,64,128,1,float16,fp8,0,0.23371520042419433
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,64,64,128,1,fp8,fp8,0,0.23416800498962403
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,64,1,128,1,float16,fp8,0,0.18986079692840577
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,64,1,128,1,float16,float16,0,0.17723360061645507
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,64,2,128,1,float16,float16,0,0.17874239683151244
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,64,1,128,1,fp8,fp8,0,0.16828160285949706
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,64,2,128,1,float16,fp8,0,0.16821919679641723
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,64,4,128,1,float16,float16,0,0.18062880039215087
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,64,4,128,1,float16,fp8,0,0.16820000410079955
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,64,2,128,1,fp8,fp8,0,0.16845760345458985
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,64,4,128,1,fp8,fp8,0,0.16801919937133789
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,64,8,128,1,float16,float16,0,0.1861407995223999
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,64,8,128,1,fp8,fp8,0,0.16808160543441772
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,64,8,128,1,float16,fp8,0,0.1687824010848999
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,64,64,128,1,float16,float16,0,0.1389008045196533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,64,64,128,1,float16,fp8,0,0.12692480087280272
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,64,64,128,1,fp8,fp8,0,0.12547999620437622
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,64,1,128,1,float16,float16,0,0.10043840408325196
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,64,1,128,1,float16,fp8,0,0.0919535994529724
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,64,1,128,1,fp8,fp8,0,0.09116320013999939
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,64,2,128,1,float16,float16,0,0.10022079944610596
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,64,2,128,1,float16,fp8,0,0.09204800128936767
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,64,2,128,1,fp8,fp8,0,0.0909168004989624
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,64,4,128,1,float16,float16,0,0.10127040147781372
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,64,4,128,1,float16,fp8,0,0.09277120232582092
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,64,4,128,1,fp8,fp8,0,0.09196799993515015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,64,8,128,1,float16,float16,0,0.10347679853439332
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,64,8,128,1,float16,fp8,0,0.0938975989818573
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,64,8,128,1,fp8,fp8,0,0.09204959869384766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,64,64,128,1,float16,fp8,0,0.07093600034713746
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,64,64,128,1,float16,float16,0,0.08006719946861267
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,64,64,128,1,fp8,fp8,0,0.07154240012168885
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,64,1,128,1,float16,float16,0,0.057132798433303836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,64,1,128,1,fp8,fp8,0,0.05349439978599548
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,64,1,128,1,float16,fp8,0,0.05365279912948608
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,64,2,128,1,float16,float16,0,0.05742560029029846
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,64,2,128,1,float16,fp8,0,0.05362880229949951
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,64,2,128,1,fp8,fp8,0,0.053636801242828366
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,64,4,128,1,float16,float16,0,0.05774719715118408
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,64,4,128,1,float16,fp8,0,0.05343040227890015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,64,4,128,1,fp8,fp8,0,0.05391839742660522
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,64,8,128,1,float16,float16,0,0.058878397941589354
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,64,8,128,1,float16,fp8,0,0.05344799757003784
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,64,64,128,1,float16,float16,0,0.045454400777816775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,64,8,128,1,fp8,fp8,0,0.05406399965286255
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,64,64,128,1,float16,fp8,0,0.042926400899887085
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,64,64,128,1,fp8,fp8,0,0.04318720102310181
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,64,1,128,1,float16,fp8,0,0.03502399921417236
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,64,1,128,1,float16,float16,0,0.0373744010925293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,64,1,128,1,fp8,fp8,0,0.03501920104026794
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,64,2,128,1,float16,float16,0,0.03704800009727478
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,64,2,128,1,float16,fp8,0,0.035132798552513125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,64,2,128,1,fp8,fp8,0,0.03526720106601715
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,64,4,128,1,float16,float16,0,0.03706560134887695
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,64,4,128,1,fp8,fp8,0,0.03499839901924133
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,64,4,128,1,float16,fp8,0,0.0353983998298645
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,64,8,128,1,float16,float16,0,0.03712959885597229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,64,8,128,1,float16,fp8,0,0.0350383996963501
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,64,8,128,1,fp8,fp8,0,0.035123199224472046
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,64,1,128,1,float16,fp8,0,1.764683151245117
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,64,1,128,1,float16,float16,0,1.7665903091430664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,64,1,128,1,fp8,fp8,0,1.7676143646240234
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,64,2,128,1,float16,float16,0,1.7690704345703125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,64,2,128,1,float16,fp8,0,1.8022560119628905
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,64,2,128,1,fp8,fp8,0,1.7613279342651367
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,64,4,128,1,float16,float16,0,1.842425537109375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,64,4,128,1,fp8,fp8,0,1.7571504592895508
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,64,4,128,1,float16,fp8,0,1.9195743560791017
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,64,8,128,1,float16,float16,0,1.9816255569458008
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,64,64,128,1,float16,float16,0,1.5666720390319824
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,64,8,128,1,fp8,fp8,0,1.753950309753418
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,64,8,128,1,float16,fp8,0,1.8605615615844726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,64,64,128,1,float16,fp8,0,1.5505663871765136
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,64,1,128,1,float16,float16,0,0.8961456298828125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,64,1,128,1,float16,fp8,0,0.8935343742370605
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,64,1,128,1,fp8,fp8,0,0.8970208168029785
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,64,64,128,1,fp8,fp8,0,1.4025775909423828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,64,2,128,1,float16,float16,0,0.8924624443054199
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,64,2,128,1,float16,fp8,0,0.8895983695983887
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,64,2,128,1,fp8,fp8,0,0.8940320014953613
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,64,4,128,1,float16,float16,0,0.9188976287841797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,64,4,128,1,float16,fp8,0,1.1157440185546874
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,64,4,128,1,fp8,fp8,0,0.8876704216003418
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,64,8,128,1,float16,float16,0,0.9612607955932617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,64,8,128,1,float16,fp8,0,0.8859711647033691
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,64,8,128,1,fp8,fp8,0,0.8856703758239746
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,64,64,128,1,float16,fp8,0,0.708403205871582
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,64,64,128,1,float16,float16,0,0.7938496112823487
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,64,64,128,1,fp8,fp8,0,0.7497951984405518
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,64,1,128,1,float16,float16,0,0.45939998626708983
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,64,1,128,1,float16,fp8,0,0.459884786605835
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,64,1,128,1,fp8,fp8,0,0.4588047981262207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,64,2,128,1,float16,float16,0,0.4627711772918701
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,64,2,128,1,float16,fp8,0,0.45464000701904295
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,64,2,128,1,fp8,fp8,0,0.4575056076049805
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,64,4,128,1,float16,float16,0,0.4720016002655029
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,64,4,128,1,float16,fp8,0,0.4548511981964111
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,64,4,128,1,fp8,fp8,0,0.4542240142822266
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,64,8,128,1,float16,float16,0,0.4954063892364502
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,64,8,128,1,float16,fp8,0,0.4518608093261719
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,64,8,128,1,fp8,fp8,0,0.45268797874450684
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,64,64,128,1,float16,float16,0,0.4121376037597656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,64,64,128,1,float16,fp8,0,0.3628864049911499
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,64,64,128,1,fp8,fp8,0,0.3625936031341553
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,64,1,128,1,float16,float16,0,0.24300639629364013
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,64,1,128,1,float16,fp8,0,0.23593599796295167
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,64,1,128,1,fp8,fp8,0,0.23801279067993164
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,64,2,128,1,float16,float16,0,0.24060161113739015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,64,2,128,1,float16,fp8,0,0.2370687961578369
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,64,2,128,1,fp8,fp8,0,0.23545598983764648
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,64,4,128,1,float16,float16,0,0.24742560386657714
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,64,4,128,1,float16,fp8,0,0.23585278987884523
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,64,4,128,1,fp8,fp8,0,0.23647840023040773
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,64,8,128,1,float16,float16,0,0.254640007019043
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,64,8,128,1,float16,fp8,0,0.2356271982192993
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,64,64,128,1,float16,float16,0,0.21580801010131836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,64,8,128,1,fp8,fp8,0,0.23446240425109863
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,64,64,128,1,float16,fp8,0,0.18932319879531861
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,64,1,128,1,float16,float16,0,0.12854880094528198
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,64,64,128,1,fp8,fp8,0,0.19005279541015624
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,64,1,128,1,float16,fp8,0,0.12441920042037964
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,64,1,128,1,fp8,fp8,0,0.1250208020210266
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,64,2,128,1,float16,float16,0,0.13031200170516968
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,64,2,128,1,float16,fp8,0,0.12398560047149658
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,64,2,128,1,fp8,fp8,0,0.12442400455474853
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,64,4,128,1,float16,float16,0,0.13206720352172852
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,64,4,128,1,float16,fp8,0,0.12473920583724976
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,64,4,128,1,fp8,fp8,0,0.12401440143585205
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,64,8,128,1,float16,float16,0,0.13751519918441774
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,64,8,128,1,float16,fp8,0,0.12368799448013305
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,64,8,128,1,fp8,fp8,0,0.12421280145645142
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,64,64,128,1,float16,float16,0,0.11176320314407348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,64,64,128,1,float16,fp8,0,0.10066560506820679
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,64,1,128,1,float16,float16,0,0.0723792016506195
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,64,64,128,1,fp8,fp8,0,0.10224800109863282
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,64,1,128,1,float16,fp8,0,0.06779839992523193
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,64,1,128,1,fp8,fp8,0,0.06838880181312561
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,64,2,128,1,float16,float16,0,0.07217599749565125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,64,2,128,1,float16,fp8,0,0.0678384006023407
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,64,2,128,1,fp8,fp8,0,0.06908159852027893
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,64,4,128,1,float16,float16,0,0.07374240159988403
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,64,4,128,1,float16,fp8,0,0.06827840209007263
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,64,4,128,1,fp8,fp8,0,0.06992800235748291
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,64,8,128,1,float16,float16,0,0.07557119727134705
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,64,8,128,1,float16,fp8,0,0.06832640171051026
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,64,8,128,1,fp8,fp8,0,0.07003200054168701
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,64,64,128,1,float16,float16,0,0.0638256013393402
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,64,64,128,1,float16,fp8,0,0.057387202978134155
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,64,1,128,1,float16,float16,0,0.04105600118637085
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,64,1,128,1,float16,fp8,0,0.03924480080604553
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,64,64,128,1,fp8,fp8,0,0.05747680068016052
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,64,1,128,1,fp8,fp8,0,0.03927519917488098
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,64,2,128,1,float16,float16,0,0.041140800714492796
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,64,2,128,1,float16,fp8,0,0.039980798959732056
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,64,4,128,1,float16,float16,0,0.04126720130443573
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,64,2,128,1,fp8,fp8,0,0.03925760090351105
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,64,4,128,1,float16,fp8,0,0.04015200138092041
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,64,4,128,1,fp8,fp8,0,0.039192000031471254
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,64,8,128,1,float16,float16,0,0.04244000017642975
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,64,8,128,1,float16,fp8,0,0.04004800021648407
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,64,8,128,1,fp8,fp8,0,0.03918400108814239
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,64,64,128,1,float16,float16,0,0.035139200091362
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,64,64,128,1,float16,fp8,0,0.03329600095748901
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,64,64,128,1,fp8,fp8,0,0.03355680108070373
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,64,1,128,1,float16,fp8,0,0.02680320143699646
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,64,1,128,1,float16,float16,0,0.026902401447296144
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,64,1,128,1,fp8,fp8,0,0.026830399036407472
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,64,2,128,1,float16,float16,0,0.026819199323654175
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,64,2,128,1,float16,fp8,0,0.026715201139450074
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,64,2,128,1,fp8,fp8,0,0.026841598749160766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,64,4,128,1,float16,float16,0,0.026807999610900878
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,64,4,128,1,float16,fp8,0,0.02675360143184662
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,64,4,128,1,fp8,fp8,0,0.026782399415969847
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,64,8,128,1,float16,float16,0,0.02717440128326416
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,64,8,128,1,float16,fp8,0,0.026752001047134398
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,64,64,128,1,float16,float16,0,0.02065120041370392
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,64,8,128,1,fp8,fp8,0,0.026812800765037538
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,64,64,128,1,float16,fp8,0,0.02083680033683777
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,64,64,128,1,fp8,fp8,0,0.02072480022907257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,64,1,128,1,float16,float16,0,0.017990399897098542
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,64,1,128,1,float16,fp8,0,0.01772480010986328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,64,1,128,1,fp8,fp8,0,0.016574400663375854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,64,2,128,1,float16,float16,0,0.017788800597190856
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,64,2,128,1,float16,fp8,0,0.01812639981508255
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,64,2,128,1,fp8,fp8,0,0.016582399606704712
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,64,4,128,1,float16,float16,0,0.018305599689483643
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,64,4,128,1,float16,fp8,0,0.01818400025367737
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,64,4,128,1,fp8,fp8,0,0.016864000260829924
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,64,8,128,1,float16,float16,0,0.018649600446224213
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,64,8,128,1,float16,fp8,0,0.01804639995098114
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,64,8,128,1,fp8,fp8,0,0.016631999611854555
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,64,1,128,1,float16,float16,0,0.7154863834381103
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,64,1,128,1,fp8,fp8,0,0.7298448085784912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,64,1,128,1,float16,fp8,0,0.725545597076416
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,64,2,128,1,float16,float16,0,0.7135280132293701
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,64,2,128,1,float16,fp8,0,0.7275087833404541
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,64,2,128,1,fp8,fp8,0,0.7233280181884766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,64,4,128,1,float16,float16,0,0.7336863994598388
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,64,4,128,1,float16,fp8,0,0.7248223781585693
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,64,4,128,1,fp8,fp8,0,0.7211391925811768
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,64,8,128,1,float16,float16,0,0.779744005203247
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,64,8,128,1,float16,fp8,0,0.7212719917297363
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,64,8,128,1,fp8,fp8,0,0.7215695858001709
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,64,64,128,1,float16,float16,0,0.6967391967773438
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,64,1,128,1,float16,float16,0,0.37056479454040525
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,64,1,128,1,float16,fp8,0,0.37081279754638674
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,64,64,128,1,float16,fp8,0,0.621457576751709
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,64,64,128,1,fp8,fp8,0,0.6159167766571045
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,64,1,128,1,fp8,fp8,0,0.3719072103500366
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,64,2,128,1,float16,float16,0,0.36752479076385497
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,64,2,128,1,float16,fp8,0,0.3729599952697754
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,64,2,128,1,fp8,fp8,0,0.3701920032501221
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,64,4,128,1,float16,float16,0,0.3784480094909668
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,64,4,128,1,float16,fp8,0,0.36953599452972413
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,64,4,128,1,fp8,fp8,0,0.3724735975265503
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,64,8,128,1,float16,float16,0,0.3953248023986816
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,64,8,128,1,float16,fp8,0,0.36940960884094237
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,64,8,128,1,fp8,fp8,0,0.36659040451049807
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,64,64,128,1,float16,float16,0,0.35575840473175047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,64,64,128,1,float16,fp8,0,0.31699199676513673
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,64,1,128,1,float16,float16,0,0.19130239486694336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,64,64,128,1,fp8,fp8,0,0.3168720006942749
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,64,1,128,1,float16,fp8,0,0.1927839994430542
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,64,1,128,1,fp8,fp8,0,0.19088319540023804
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,64,2,128,1,float16,float16,0,0.19148160219192506
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,64,2,128,1,float16,fp8,0,0.1917616009712219
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,64,2,128,1,fp8,fp8,0,0.1907423973083496
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,64,4,128,1,float16,float16,0,0.19592479467391968
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,64,4,128,1,float16,fp8,0,0.19126559495925904
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,64,4,128,1,fp8,fp8,0,0.19003520011901856
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,64,8,128,1,float16,float16,0,0.20624160766601562
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,64,8,128,1,float16,fp8,0,0.19116640090942383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,64,8,128,1,fp8,fp8,0,0.19060959815979003
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,64,64,128,1,float16,float16,0,0.18800640106201172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,64,64,128,1,float16,fp8,0,0.16574079990386964
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,64,1,128,1,float16,float16,0,0.10295679569244384
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,64,1,128,1,float16,fp8,0,0.10352319478988647
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,64,64,128,1,fp8,fp8,0,0.16634559631347656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,64,1,128,1,fp8,fp8,0,0.10203679800033569
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,64,2,128,1,float16,fp8,0,0.10296640396118165
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,64,2,128,1,float16,float16,0,0.10419520139694213
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,64,2,128,1,fp8,fp8,0,0.1016975998878479
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,64,4,128,1,float16,float16,0,0.10519520044326783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,64,4,128,1,float16,fp8,0,0.10367039442062378
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,64,4,128,1,fp8,fp8,0,0.10257760286331177
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,64,8,128,1,float16,float16,0,0.11102559566497802
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,64,8,128,1,float16,fp8,0,0.10313119888305664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,64,8,128,1,fp8,fp8,0,0.10266400575637817
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,64,64,128,1,float16,float16,0,0.09656479954719543
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,64,64,128,1,float16,fp8,0,0.08823999762535095
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,64,1,128,1,float16,float16,0,0.05748479962348938
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,64,64,128,1,fp8,fp8,0,0.0882207989692688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,64,1,128,1,float16,fp8,0,0.05509120225906372
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,64,1,128,1,fp8,fp8,0,0.05528479814529419
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,64,2,128,1,float16,float16,0,0.05775840282440185
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,64,2,128,1,float16,fp8,0,0.054979199171066286
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,64,2,128,1,fp8,fp8,0,0.05557439923286438
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,64,4,128,1,float16,float16,0,0.058422398567199704
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,64,4,128,1,float16,fp8,0,0.055801600217819214
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,64,4,128,1,fp8,fp8,0,0.0555184006690979
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,64,8,128,1,float16,float16,0,0.06089760065078735
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,64,8,128,1,float16,fp8,0,0.05558879971504212
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,64,8,128,1,fp8,fp8,0,0.055511999130249026
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,64,64,128,1,float16,float16,0,0.056678402423858645
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,64,64,128,1,float16,fp8,0,0.05098239779472351
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,64,64,128,1,fp8,fp8,0,0.05137280225753784
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,64,1,128,1,float16,float16,0,0.033435198664665225
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,64,1,128,1,float16,fp8,0,0.033348798751831055
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,64,1,128,1,fp8,fp8,0,0.03342719972133636
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,64,2,128,1,float16,float16,0,0.033313599228858945
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,64,2,128,1,float16,fp8,0,0.033257600665092465
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,64,4,128,1,float16,float16,0,0.035078400373458864
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,64,2,128,1,fp8,fp8,0,0.033251199126243594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,64,4,128,1,float16,fp8,0,0.033847999572753903
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,64,4,128,1,fp8,fp8,0,0.03316799998283386
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,64,8,128,1,float16,float16,0,0.03555839955806732
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,64,8,128,1,float16,fp8,0,0.033195200562477115
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,64,8,128,1,fp8,fp8,0,0.033180800080299375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,64,64,128,1,float16,float16,0,0.02894560098648071
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,64,64,128,1,float16,fp8,0,0.028832000494003297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,64,64,128,1,fp8,fp8,0,0.028915199637413024
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,64,1,128,1,float16,float16,0,0.020695999264717102
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,64,1,128,1,float16,fp8,0,0.020632000267505647
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,64,1,128,1,fp8,fp8,0,0.02072319984436035
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,64,2,128,1,float16,float16,0,0.020640000700950623
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,64,2,128,1,float16,fp8,0,0.020644800364971162
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,64,2,128,1,fp8,fp8,0,0.020694400370121
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,64,4,128,1,float16,float16,0,0.020632000267505647
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,64,4,128,1,float16,fp8,0,0.020718400180339814
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,64,8,128,1,float16,float16,0,0.021491199731826782
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,64,4,128,1,fp8,fp8,0,0.020846399664878845
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,64,64,128,1,float16,float16,0,0.018515199422836304
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,64,8,128,1,fp8,fp8,0,0.020667199790477753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,64,8,128,1,float16,fp8,0,0.020684799551963805
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,64,64,128,1,float16,fp8,0,0.01868479996919632
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,64,64,128,1,fp8,fp8,0,0.018617600202560425
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,64,1,128,1,float16,float16,0,0.014486399292945863
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,64,1,128,1,float16,fp8,0,0.014627200365066529
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,64,1,128,1,fp8,fp8,0,0.014523200690746307
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,64,2,128,1,float16,float16,0,0.0144896000623703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,64,2,128,1,float16,fp8,0,0.014659200608730317
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,64,2,128,1,fp8,fp8,0,0.01451680064201355
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,64,4,128,1,float16,float16,0,0.014504000544548035
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,64,4,128,1,float16,fp8,0,0.0147024005651474
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,64,4,128,1,fp8,fp8,0,0.014531199634075165
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,64,8,128,1,float16,float16,0,0.014574399590492249
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,64,8,128,1,float16,fp8,0,0.014572800695896148
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,64,8,128,1,fp8,fp8,0,0.014560000598430633
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,64,64,128,1,float16,float16,0,0.01650400012731552
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,64,64,128,1,float16,fp8,0,0.016487999260425566
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,64,1,128,1,float16,float16,0,0.014519999921321868
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,64,64,128,1,fp8,fp8,0,0.0165583997964859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,64,2,128,1,float16,float16,0,0.014519999921321868
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,64,1,128,1,float16,fp8,0,0.014644800126552582
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,64,1,128,1,fp8,fp8,0,0.014539200067520141
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,64,2,128,1,float16,fp8,0,0.014558400213718414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,64,2,128,1,fp8,fp8,0,0.014483200013637542
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,64,4,128,1,float16,float16,0,0.014574399590492249
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,64,4,128,1,float16,fp8,0,0.01454399973154068
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,64,4,128,1,fp8,fp8,0,0.014502400159835815
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,64,8,128,1,float16,float16,0,0.014686399698257446
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,64,8,128,1,float16,fp8,0,0.014459200203418732
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,64,8,128,1,fp8,fp8,0,0.014504000544548035
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,64,1,128,1,float16,float16,0,0.4491392135620117
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,64,1,128,1,float16,fp8,0,0.45699357986450195
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,64,1,128,1,fp8,fp8,0,0.4584383964538574
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,64,2,128,1,float16,float16,0,0.4482384204864502
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,64,2,128,1,float16,fp8,0,0.45694398880004883
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,64,2,128,1,fp8,fp8,0,0.45696320533752444
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,64,4,128,1,float16,float16,0,0.4565264225006104
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,64,4,128,1,float16,fp8,0,0.45496320724487305
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,64,4,128,1,fp8,fp8,0,0.4556416034698486
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,64,8,128,1,float16,fp8,0,0.4537487983703613
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,64,8,128,1,fp8,fp8,0,0.4537775993347168
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,64,8,128,1,float16,float16,0,0.47626562118530275
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,64,64,128,1,float16,fp8,0,0.356278395652771
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,64,64,128,1,float16,float16,0,0.3922544002532959
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,64,64,128,1,fp8,fp8,0,0.3586848020553589
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,64,1,128,1,float16,float16,0,0.23235039710998534
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,64,1,128,1,fp8,fp8,0,0.23388800621032715
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,64,1,128,1,float16,fp8,0,0.23404960632324218
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,64,2,128,1,float16,float16,0,0.23136639595031738
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,64,2,128,1,fp8,fp8,0,0.23360960483551024
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,64,2,128,1,float16,fp8,0,0.23405280113220214
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,64,4,128,1,float16,float16,0,0.23563039302825928
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,64,4,128,1,float16,fp8,0,0.23267199993133544
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,64,4,128,1,fp8,fp8,0,0.23391358852386473
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,64,8,128,1,float16,float16,0,0.24509758949279786
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,64,8,128,1,float16,fp8,0,0.23226559162139893
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,64,8,128,1,fp8,fp8,0,0.23363039493560792
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,64,64,128,1,float16,float16,0,0.20371038913726808
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,64,64,128,1,float16,fp8,0,0.18369120359420776
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,64,64,128,1,fp8,fp8,0,0.18483999967575074
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,64,1,128,1,float16,float16,0,0.12288320064544678
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,64,1,128,1,float16,fp8,0,0.12123359441757202
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,64,1,128,1,fp8,fp8,0,0.1214143991470337
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,64,2,128,1,float16,float16,0,0.12246240377426147
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,64,2,128,1,float16,fp8,0,0.12118240594863891
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,64,2,128,1,fp8,fp8,0,0.12141280174255371
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,64,4,128,1,float16,float16,0,0.12371519804000855
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,64,4,128,1,float16,fp8,0,0.12125760316848755
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,64,4,128,1,fp8,fp8,0,0.12143360376358033
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,64,8,128,1,float16,float16,0,0.12958879470825196
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,64,8,128,1,float16,fp8,0,0.12126400470733642
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,64,8,128,1,fp8,fp8,0,0.12193119525909424
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,64,64,128,1,float16,float16,0,0.10398240089416504
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,64,64,128,1,float16,fp8,0,0.0971567988395691
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,64,64,128,1,fp8,fp8,0,0.09860320091247558
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,64,1,128,1,float16,float16,0,0.06706079840660095
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,64,1,128,1,float16,fp8,0,0.06574079990386963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,64,1,128,1,fp8,fp8,0,0.06590080261230469
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,64,2,128,1,float16,float16,0,0.06798880100250244
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,64,2,128,1,float16,fp8,0,0.06589919924736024
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,64,2,128,1,fp8,fp8,0,0.06611199975013733
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,64,4,128,1,float16,float16,0,0.0696287989616394
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,64,4,128,1,float16,fp8,0,0.06572960019111633
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,64,4,128,1,fp8,fp8,0,0.06600319743156433
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,64,8,128,1,float16,float16,0,0.07112320065498352
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,64,64,128,1,float16,float16,0,0.058011198043823244
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,64,8,128,1,fp8,fp8,0,0.06585919857025146
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,64,64,128,1,float16,fp8,0,0.05350559949874878
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,64,8,128,1,float16,fp8,0,0.06625279784202576
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,64,1,128,1,float16,float16,0,0.03643040060997009
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,64,1,128,1,float16,fp8,0,0.03684160113334656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,64,64,128,1,fp8,fp8,0,0.0534991979598999
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,64,1,128,1,fp8,fp8,0,0.03720960021018982
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,64,2,128,1,float16,float16,0,0.03700799942016601
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,64,2,128,1,float16,fp8,0,0.03697920143604279
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,64,2,128,1,fp8,fp8,0,0.037003201246261594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,64,4,128,1,float16,float16,0,0.03709439933300018
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,64,4,128,1,float16,fp8,0,0.037089601159095764
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,64,4,128,1,fp8,fp8,0,0.03693279922008515
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,64,8,128,1,float16,float16,0,0.039211198687553406
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,64,8,128,1,float16,fp8,0,0.03714239895343781
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,64,8,128,1,fp8,fp8,0,0.03699199855327606
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,64,64,128,1,float16,float16,0,0.031171199679374696
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,64,64,128,1,float16,fp8,0,0.031147199869155883
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,64,1,128,1,float16,float16,0,0.022753599286079406
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,64,1,128,1,float16,fp8,0,0.02282080054283142
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,64,64,128,1,fp8,fp8,0,0.03089280128479004
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,64,1,128,1,fp8,fp8,0,0.024553599953651428
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,64,2,128,1,float16,float16,0,0.022729599475860597
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,64,2,128,1,float16,fp8,0,0.02280319929122925
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,64,2,128,1,fp8,fp8,0,0.022838400304317476
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,64,4,128,1,float16,float16,0,0.02480800002813339
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,64,4,128,1,float16,fp8,0,0.022697600722312927
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,64,4,128,1,fp8,fp8,0,0.022708800435066224
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,64,8,128,1,float16,fp8,0,0.02470560073852539
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,64,8,128,1,float16,float16,0,0.024766400456428528
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,64,8,128,1,fp8,fp8,0,0.02274399995803833
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,64,64,128,1,float16,float16,0,0.018644799292087556
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,64,64,128,1,float16,fp8,0,0.020559999346733093
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,64,64,128,1,fp8,fp8,0,0.020623999834060668
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,64,1,128,1,float16,float16,0,0.01478080004453659
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,64,1,128,1,float16,fp8,0,0.016531200706958772
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,64,1,128,1,fp8,fp8,0,0.016545599699020384
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,64,2,128,1,float16,float16,0,0.01632159948348999
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,64,2,128,1,float16,fp8,0,0.01459839940071106
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,64,2,128,1,fp8,fp8,0,0.016575999557971954
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,64,4,128,1,float16,fp8,0,0.014851200580596923
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,64,4,128,1,float16,float16,0,0.016575999557971954
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,64,4,128,1,fp8,fp8,0,0.01645440012216568
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,64,8,128,1,float16,float16,0,0.016646400094032288
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,64,8,128,1,float16,fp8,0,0.0165583997964859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,64,8,128,1,fp8,fp8,0,0.016468800604343414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,64,64,128,1,float16,float16,0,0.014577600359916686
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,64,64,128,1,fp8,fp8,0,0.014440000057220459
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,64,64,128,1,float16,fp8,0,0.014444799721240997
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,64,1,128,1,float16,float16,0,0.012483199685811996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,64,1,128,1,float16,fp8,0,0.012361600250005721
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,64,1,128,1,fp8,fp8,0,0.012392000108957291
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,64,2,128,1,float16,float16,0,0.012462399899959564
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,64,2,128,1,float16,fp8,0,0.012388800084590913
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,64,4,128,1,float16,float16,0,0.012510399520397186
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,64,2,128,1,fp8,fp8,0,0.012415999919176102
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,64,4,128,1,float16,fp8,0,0.012355200201272964
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,64,4,128,1,fp8,fp8,0,0.012476799637079239
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,64,8,128,1,float16,float16,0,0.012380799651145935
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,64,8,128,1,float16,fp8,0,0.012443199753761292
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,64,8,128,1,fp8,fp8,0,0.012425599992275238
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,64,64,128,1,float16,float16,0,0.012488000094890594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,64,64,128,1,float16,fp8,0,0.012465599924325943
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,64,64,128,1,fp8,fp8,0,0.012484800070524216
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,64,1,128,1,float16,float16,0,0.011771199852228164
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,64,1,128,1,float16,fp8,0,0.012068799883127212
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,64,1,128,1,fp8,fp8,0,0.010460799932479859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,64,2,128,1,float16,float16,0,0.0118367999792099
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,64,2,128,1,fp8,fp8,0,0.010473600029945374
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,64,2,128,1,float16,fp8,0,0.012348800152540206
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,64,4,128,1,float16,float16,0,0.012299200147390365
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,64,4,128,1,fp8,fp8,0,0.012358400225639343
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,64,4,128,1,float16,fp8,0,0.01058880016207695
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,64,8,128,1,float16,float16,0,0.012348800152540206
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,64,8,128,1,float16,fp8,0,0.012414400279521943
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,64,8,128,1,fp8,fp8,0,0.010609599947929382
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,64,1,128,1,float16,float16,0,0.37512640953063964
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,64,1,128,1,fp8,fp8,0,0.3731775999069214
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,64,1,128,1,float16,fp8,0,0.3745552062988281
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,64,2,128,1,float16,float16,0,0.3738176107406616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,64,2,128,1,fp8,fp8,0,0.37406558990478517
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,64,2,128,1,float16,fp8,0,0.3727184057235718
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,64,4,128,1,float16,float16,0,0.3783519983291626
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,64,4,128,1,float16,fp8,0,0.37341439723968506
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,64,4,128,1,fp8,fp8,0,0.37216479778289796
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,64,8,128,1,float16,fp8,0,0.37205278873443604
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,64,8,128,1,fp8,fp8,0,0.3714495897293091
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,64,8,128,1,float16,float16,0,0.3881999969482422
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,64,64,128,1,float16,fp8,0,0.25206561088562013
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,64,64,128,1,float16,float16,0,0.2757872104644775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,64,64,128,1,fp8,fp8,0,0.2525199890136719
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,64,1,128,1,float16,float16,0,0.19322880506515502
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,64,1,128,1,float16,fp8,0,0.1918928027153015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,64,1,128,1,fp8,fp8,0,0.19099199771881104
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,64,2,128,1,float16,float16,0,0.193123197555542
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,64,2,128,1,float16,fp8,0,0.19242719411849976
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,64,2,128,1,fp8,fp8,0,0.1907871961593628
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,64,4,128,1,float16,float16,0,0.1954095959663391
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,64,4,128,1,float16,fp8,0,0.19165120124816895
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,64,4,128,1,fp8,fp8,0,0.1903872013092041
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,64,8,128,1,float16,float16,0,0.20044960975646972
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,64,8,128,1,float16,fp8,0,0.19207520484924318
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,64,8,128,1,fp8,fp8,0,0.19021120071411132
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,64,64,128,1,float16,float16,0,0.138428795337677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,64,64,128,1,float16,fp8,0,0.1322175979614258
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,64,64,128,1,fp8,fp8,0,0.131494402885437
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,64,1,128,1,float16,float16,0,0.10270240306854247
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,64,1,128,1,float16,fp8,0,0.10063199996948242
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,64,1,128,1,fp8,fp8,0,0.09893440008163452
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,64,2,128,1,float16,float16,0,0.10237280130386353
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,64,2,128,1,float16,fp8,0,0.10070240497589111
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,64,2,128,1,fp8,fp8,0,0.09893919825553894
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,64,4,128,1,float16,float16,0,0.10316959619522095
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,64,4,128,1,float16,fp8,0,0.1004080057144165
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,64,4,128,1,fp8,fp8,0,0.09896799921989441
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,64,8,128,1,float16,float16,0,0.10672960281372071
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,64,8,128,1,float16,fp8,0,0.10008319616317748
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,64,64,128,1,float16,float16,0,0.0754527986049652
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,64,8,128,1,fp8,fp8,0,0.09991199970245361
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,64,64,128,1,float16,fp8,0,0.07108960151672364
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,64,64,128,1,fp8,fp8,0,0.06991199851036071
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,64,1,128,1,float16,float16,0,0.05564640164375305
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,64,1,128,1,fp8,fp8,0,0.053737598657608035
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,64,1,128,1,float16,fp8,0,0.055289602279663085
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,64,2,128,1,float16,float16,0,0.05548160076141358
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,64,2,128,1,float16,fp8,0,0.053827202320098876
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,64,2,128,1,fp8,fp8,0,0.05388479828834534
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,64,4,128,1,float16,fp8,0,0.05390239953994751
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,64,4,128,1,float16,float16,0,0.05623199939727783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,64,4,128,1,fp8,fp8,0,0.053699201345443724
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,64,8,128,1,float16,float16,0,0.057897597551345825
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,64,8,128,1,float16,fp8,0,0.05362719893455505
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,64,64,128,1,float16,float16,0,0.04110080003738403
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,64,64,128,1,float16,fp8,0,0.039113599061965945
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,64,8,128,1,fp8,fp8,0,0.05403040051460266
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,64,64,128,1,fp8,fp8,0,0.03905119895935059
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,64,1,128,1,float16,float16,0,0.031198400259017944
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,64,1,128,1,float16,fp8,0,0.030980798602104186
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,64,1,128,1,fp8,fp8,0,0.03091520071029663
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,64,2,128,1,float16,float16,0,0.031651198863983154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,64,2,128,1,float16,fp8,0,0.03140319883823395
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,64,2,128,1,fp8,fp8,0,0.030900800228118898
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,64,4,128,1,float16,fp8,0,0.030982398986816408
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,64,4,128,1,fp8,fp8,0,0.03091520071029663
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,64,4,128,1,float16,float16,0,0.03295199871063233
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,64,8,128,1,float16,float16,0,0.03302719891071319
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,64,8,128,1,float16,fp8,0,0.03091520071029663
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,64,8,128,1,fp8,fp8,0,0.031060799956321716
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,64,64,128,1,float16,float16,0,0.024726399779319765
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,64,64,128,1,fp8,fp8,0,0.02481440007686615
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,64,64,128,1,float16,fp8,0,0.024747200310230255
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,64,1,128,1,float16,float16,0,0.020875200629234314
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,64,1,128,1,float16,fp8,0,0.020761600136756896
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,64,1,128,1,fp8,fp8,0,0.020638400316238405
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,64,2,128,1,float16,float16,0,0.02088959962129593
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,64,2,128,1,float16,fp8,0,0.020747199654579163
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,64,2,128,1,fp8,fp8,0,0.020656000077724456
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,64,4,128,1,float16,float16,0,0.020902399718761445
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,64,4,128,1,float16,fp8,0,0.02080159932374954
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,64,8,128,1,float16,float16,0,0.02194399982690811
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,64,4,128,1,fp8,fp8,0,0.020630399882793426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,64,8,128,1,float16,fp8,0,0.020710399746894835
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,64,8,128,1,fp8,fp8,0,0.02072480022907257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,64,64,128,1,float16,float16,0,0.016553600132465363
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,64,64,128,1,float16,fp8,0,0.016612799465656282
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,64,64,128,1,fp8,fp8,0,0.01658719927072525
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,64,1,128,1,float16,float16,0,0.014608000218868256
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,64,1,128,1,float16,fp8,0,0.014523200690746307
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,64,1,128,1,fp8,fp8,0,0.014643199741840363
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,64,2,128,1,float16,float16,0,0.014532800018787383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,64,2,128,1,float16,fp8,0,0.014580799639225006
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,64,2,128,1,fp8,fp8,0,0.014499199390411378
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,64,4,128,1,float16,float16,0,0.014449599385261535
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,64,4,128,1,float16,fp8,0,0.014507199823856353
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,64,4,128,1,fp8,fp8,0,0.014494399726390838
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,64,8,128,1,float16,float16,0,0.01451680064201355
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,64,8,128,1,float16,fp8,0,0.01449120044708252
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,64,8,128,1,fp8,fp8,0,0.014521600306034088
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,64,64,128,1,float16,float16,0,0.012571200728416443
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,64,64,128,1,float16,fp8,0,0.01239359974861145
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,64,1,128,1,float16,float16,0,0.010452800244092942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,64,64,128,1,fp8,fp8,0,0.012449599802494049
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,64,1,128,1,float16,fp8,0,0.010521599650382995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,64,2,128,1,float16,float16,0,0.01056160032749176
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,64,1,128,1,fp8,fp8,0,0.010427200049161912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,64,2,128,1,float16,fp8,0,0.010543999820947647
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,64,2,128,1,fp8,fp8,0,0.012358400225639343
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,64,4,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,64,4,128,1,float16,fp8,0,0.010491199791431427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,64,4,128,1,fp8,fp8,0,0.011992000043392181
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,64,8,128,1,float16,float16,0,0.010425599664449692
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,64,8,128,1,float16,fp8,0,0.011905600130558015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,64,8,128,1,fp8,fp8,0,0.011740799993276596
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,64,64,128,1,float16,float16,0,0.012491200119256973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,64,64,128,1,float16,fp8,0,0.011843200027942657
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,64,64,128,1,fp8,fp8,0,0.011107199639081956
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,64,1,128,1,float16,float16,0,0.010416000336408614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,64,1,128,1,float16,fp8,0,0.010582400113344192
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,64,1,128,1,fp8,fp8,0,0.010542400181293488
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,64,2,128,1,float16,float16,0,0.010416000336408614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,64,2,128,1,fp8,fp8,0,0.010496000200510025
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,64,2,128,1,float16,fp8,0,0.010516799986362457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,64,4,128,1,float16,float16,0,0.010424000024795533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,64,4,128,1,float16,fp8,0,0.010545600205659866
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,64,4,128,1,fp8,fp8,0,0.01048159971833229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,64,8,128,1,float16,float16,0,0.010532800108194351
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,64,8,128,1,float16,fp8,0,0.010531199723482132
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,64,8,128,1,fp8,fp8,0,0.010620799660682679
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,64,1,128,1,float16,float16,0,0.3334752082824707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,64,1,128,1,float16,fp8,0,0.33009920120239256
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,64,1,128,1,fp8,fp8,0,0.32796320915222166
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,64,2,128,1,float16,float16,0,0.33275680541992186
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,64,2,128,1,float16,fp8,0,0.33015999794006345
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,64,2,128,1,fp8,fp8,0,0.3282927989959717
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,64,4,128,1,float16,fp8,0,0.32988638877868653
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,64,4,128,1,float16,float16,0,0.3348623991012573
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,64,4,128,1,fp8,fp8,0,0.32760798931121826
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,64,8,128,1,float16,fp8,0,0.32838399410247804
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,64,8,128,1,float16,float16,0,0.3397104024887085
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,64,8,128,1,fp8,fp8,0,0.3270319938659668
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,64,64,128,1,float16,fp8,0,0.20103518962860106
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,64,64,128,1,float16,float16,0,0.20940639972686767
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,64,64,128,1,fp8,fp8,0,0.19984159469604493
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,64,1,128,1,float16,float16,0,0.1746127963066101
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,64,1,128,1,float16,fp8,0,0.16826560497283935
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,64,1,128,1,fp8,fp8,0,0.16871360540390015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,64,2,128,1,float16,float16,0,0.17492159605026245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,64,2,128,1,float16,fp8,0,0.16847360134124756
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,64,2,128,1,fp8,fp8,0,0.1686959981918335
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,64,4,128,1,float16,float16,0,0.17553119659423827
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,64,4,128,1,float16,fp8,0,0.1687600016593933
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,64,4,128,1,fp8,fp8,0,0.1688207983970642
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,64,8,128,1,float16,float16,0,0.17789599895477295
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,64,8,128,1,float16,fp8,0,0.16849440336227417
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,64,8,128,1,fp8,fp8,0,0.16889760494232178
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,64,64,128,1,float16,float16,0,0.11163359880447388
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,64,64,128,1,float16,fp8,0,0.10482239723205566
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,64,64,128,1,fp8,fp8,0,0.10475200414657593
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,64,1,128,1,float16,float16,0,0.09051679968833923
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,64,1,128,1,fp8,fp8,0,0.08827840089797974
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,64,1,128,1,float16,fp8,0,0.08848000168800355
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,64,2,128,1,float16,float16,0,0.0908128023147583
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,64,2,128,1,float16,fp8,0,0.08836320042610168
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,64,2,128,1,fp8,fp8,0,0.08856800198554993
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,64,4,128,1,float16,float16,0,0.09125120043754578
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,64,4,128,1,float16,fp8,0,0.08829920291900635
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,64,8,128,1,float16,float16,0,0.09381279945373536
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,64,4,128,1,fp8,fp8,0,0.08864160180091858
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,64,8,128,1,float16,fp8,0,0.0883072018623352
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,64,8,128,1,fp8,fp8,0,0.08836960196495056
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,64,64,128,1,float16,fp8,0,0.055559998750686644
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,64,64,128,1,float16,float16,0,0.059894400835037234
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,64,1,128,1,float16,float16,0,0.049572798609733584
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,64,64,128,1,fp8,fp8,0,0.05568959712982178
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,64,1,128,1,float16,fp8,0,0.04818240106105805
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,64,1,128,1,fp8,fp8,0,0.047777599096298216
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,64,2,128,1,float16,float16,0,0.05037760138511658
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,64,2,128,1,fp8,fp8,0,0.04795520007610321
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,64,2,128,1,float16,fp8,0,0.04954400062561035
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,64,4,128,1,float16,float16,0,0.051016002893447876
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,64,4,128,1,float16,fp8,0,0.04930399954319
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,64,4,128,1,fp8,fp8,0,0.049532800912857056
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,64,8,128,1,float16,float16,0,0.05151360034942627
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,64,8,128,1,float16,fp8,0,0.049297600984573364
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,64,64,128,1,float16,float16,0,0.03298400044441223
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,64,8,128,1,fp8,fp8,0,0.049481600522994995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,64,64,128,1,float16,fp8,0,0.033051198720932005
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,64,64,128,1,fp8,fp8,0,0.03305279910564422
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,64,1,128,1,float16,fp8,0,0.028785601258277893
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,64,1,128,1,float16,float16,0,0.029291200637817382
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,64,1,128,1,fp8,fp8,0,0.028947201371192933
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,64,2,128,1,float16,float16,0,0.02963840067386627
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,64,2,128,1,float16,fp8,0,0.029003199934959412
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,64,2,128,1,fp8,fp8,0,0.02887359857559204
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,64,4,128,1,float16,float16,0,0.029902398586273193
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,64,4,128,1,float16,fp8,0,0.029080000519752503
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,64,4,128,1,fp8,fp8,0,0.028916800022125246
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,64,8,128,1,float16,float16,0,0.030862399935722352
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,64,8,128,1,fp8,fp8,0,0.02877599895000458
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,64,64,128,1,float16,float16,0,0.021859200298786165
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,64,8,128,1,float16,fp8,0,0.028942400217056276
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,64,64,128,1,float16,fp8,0,0.02067999988794327
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,64,64,128,1,fp8,fp8,0,0.020824000239372253
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,64,1,128,1,float16,float16,0,0.0188400000333786
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,64,1,128,1,float16,fp8,0,0.018651199340820313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,64,1,128,1,fp8,fp8,0,0.01867839992046356
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,64,2,128,1,float16,float16,0,0.018799999356269838
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,64,2,128,1,float16,fp8,0,0.018699200451374055
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,64,2,128,1,fp8,fp8,0,0.018731200695037843
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,64,4,128,1,float16,fp8,0,0.018620799481868743
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,64,4,128,1,float16,float16,0,0.018806399405002595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,64,4,128,1,fp8,fp8,0,0.018753600120544434
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,64,8,128,1,float16,float16,0,0.01930239945650101
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,64,8,128,1,fp8,fp8,0,0.01987839937210083
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,64,8,128,1,float16,fp8,0,0.018775999546051025
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,64,64,128,1,float16,float16,0,0.014529600739479065
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,64,64,128,1,float16,fp8,0,0.01451839953660965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,64,64,128,1,fp8,fp8,0,0.014683200418949128
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,64,1,128,1,float16,float16,0,0.014504000544548035
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,64,1,128,1,float16,fp8,0,0.014396800100803376
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,64,1,128,1,fp8,fp8,0,0.014569599926471711
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,64,2,128,1,float16,float16,0,0.013447999954223633
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,64,2,128,1,float16,fp8,0,0.014480000734329224
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,64,2,128,1,fp8,fp8,0,0.01454399973154068
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,64,4,128,1,float16,float16,0,0.014097599685192109
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,64,4,128,1,float16,fp8,0,0.014435200393199921
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,64,4,128,1,fp8,fp8,0,0.014507199823856353
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,64,8,128,1,float16,fp8,0,0.014532800018787383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,64,8,128,1,fp8,fp8,0,0.014524799585342408
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,64,8,128,1,float16,float16,0,0.01446399986743927
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,64,64,128,1,float16,float16,0,0.012680000066757202
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,64,64,128,1,fp8,fp8,0,0.010435199737548828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,64,64,128,1,float16,fp8,0,0.010436800122261048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,64,1,128,1,float16,float16,0,0.010593599826097488
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,64,1,128,1,float16,fp8,0,0.010436800122261048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,64,1,128,1,fp8,fp8,0,0.010441599786281586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,64,2,128,1,float16,float16,0,0.010543999820947647
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,64,2,128,1,float16,fp8,0,0.010444799810647965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,64,2,128,1,fp8,fp8,0,0.01051200032234192
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,64,4,128,1,float16,float16,0,0.010596799850463866
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,64,4,128,1,float16,fp8,0,0.010499200224876404
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,64,4,128,1,fp8,fp8,0,0.010451199859380722
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,64,8,128,1,float16,float16,0,0.010500799864530563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,64,8,128,1,float16,fp8,0,0.010409600287675857
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,64,8,128,1,fp8,fp8,0,0.010414399951696397
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,64,64,128,1,float16,float16,0,0.012467200309038163
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,64,64,128,1,float16,fp8,0,0.01050880029797554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,64,64,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,64,1,128,1,float16,float16,0,0.010489600151777268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,64,1,128,1,float16,fp8,0,0.01048159971833229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,64,1,128,1,fp8,fp8,0,0.010454399883747101
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,64,2,128,1,float16,fp8,0,0.010446400195360184
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,64,2,128,1,float16,float16,0,0.01048159971833229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,64,2,128,1,fp8,fp8,0,0.010409600287675857
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,64,4,128,1,float16,float16,0,0.01040479987859726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,64,4,128,1,float16,fp8,0,0.010500799864530563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,64,4,128,1,fp8,fp8,0,0.010411199927330018
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,64,8,128,1,float16,float16,0,0.010420800000429154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,64,8,128,1,float16,fp8,0,0.010416000336408614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,64,8,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,64,1,128,1,float16,float16,0,0.3225631952285767
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,64,1,128,1,float16,fp8,0,0.3097007989883423
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,64,1,128,1,fp8,fp8,0,0.30931520462036133
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,64,2,128,1,float16,float16,0,0.32223680019378664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,64,2,128,1,float16,fp8,0,0.3095792055130005
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,64,2,128,1,fp8,fp8,0,0.30937440395355226
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,64,4,128,1,float16,float16,0,0.32504959106445314
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,64,4,128,1,float16,fp8,0,0.30938880443572997
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,64,4,128,1,fp8,fp8,0,0.30962719917297366
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,64,8,128,1,float16,fp8,0,0.30943999290466306
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,64,8,128,1,float16,float16,0,0.3264591932296753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,64,8,128,1,fp8,fp8,0,0.30898399353027345
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,64,64,128,1,float16,float16,0,0.18600800037384033
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,64,64,128,1,float16,fp8,0,0.17494239807128906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,64,64,128,1,fp8,fp8,0,0.17445119619369506
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,64,1,128,1,float16,float16,0,0.16655199527740477
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,64,1,128,1,fp8,fp8,0,0.15807679891586304
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,64,1,128,1,float16,fp8,0,0.1582927942276001
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,64,2,128,1,float16,float16,0,0.1663439989089966
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,64,2,128,1,float16,fp8,0,0.1580512046813965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,64,2,128,1,fp8,fp8,0,0.15817760229110717
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,64,4,128,1,float16,float16,0,0.16739679574966432
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,64,4,128,1,float16,fp8,0,0.1580672025680542
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,64,4,128,1,fp8,fp8,0,0.15890560150146485
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,64,8,128,1,float16,float16,0,0.16862080097198487
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,64,8,128,1,float16,fp8,0,0.1580448031425476
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,64,8,128,1,fp8,fp8,0,0.15839200019836425
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,64,64,128,1,float16,float16,0,0.09680479764938354
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,64,64,128,1,float16,fp8,0,0.0923471987247467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,64,64,128,1,fp8,fp8,0,0.09132159948348999
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,64,1,128,1,float16,float16,0,0.0884607970714569
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,64,1,128,1,float16,fp8,0,0.08231520056724548
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,64,1,128,1,fp8,fp8,0,0.08392159938812256
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,64,2,128,1,float16,fp8,0,0.08229600191116333
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,64,2,128,1,float16,float16,0,0.08844959735870361
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,64,2,128,1,fp8,fp8,0,0.08392000198364258
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,64,4,128,1,float16,float16,0,0.08924639821052552
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,64,4,128,1,float16,fp8,0,0.08373919725418091
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,64,4,128,1,fp8,fp8,0,0.08412479758262634
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,64,8,128,1,float16,float16,0,0.08959519863128662
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,64,8,128,1,float16,fp8,0,0.08403519988059997
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,64,64,128,1,float16,float16,0,0.05281760096549988
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,64,64,128,1,float16,fp8,0,0.05137119889259338
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,64,8,128,1,fp8,fp8,0,0.08429440259933471
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,64,1,128,1,float16,float16,0,0.04936319887638092
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,64,1,128,1,float16,fp8,0,0.047286400198936464
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,64,64,128,1,fp8,fp8,0,0.04992640018463135
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,64,1,128,1,fp8,fp8,0,0.047295999526977536
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,64,2,128,1,float16,float16,0,0.049646401405334474
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,64,2,128,1,float16,fp8,0,0.04707199931144714
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,64,2,128,1,fp8,fp8,0,0.04622719883918762
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,64,4,128,1,float16,float16,0,0.04951040148735046
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,64,4,128,1,float16,fp8,0,0.04719679951667786
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,64,4,128,1,fp8,fp8,0,0.04596160054206848
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,64,8,128,1,float16,float16,0,0.04949600100517273
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,64,8,128,1,float16,fp8,0,0.047244799137115476
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,64,8,128,1,fp8,fp8,0,0.04588159918785095
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,64,64,128,1,float16,float16,0,0.031014400720596313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,64,64,128,1,float16,fp8,0,0.02886880040168762
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,64,1,128,1,float16,float16,0,0.028999999165534973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,64,64,128,1,fp8,fp8,0,0.028891199827194215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,64,1,128,1,float16,fp8,0,0.02680639922618866
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,64,1,128,1,fp8,fp8,0,0.02701280117034912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,64,2,128,1,float16,float16,0,0.02886880040168762
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,64,2,128,1,float16,fp8,0,0.026812800765037538
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,64,2,128,1,fp8,fp8,0,0.027003198862075806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,64,4,128,1,float16,float16,0,0.028942400217056276
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,64,4,128,1,float16,fp8,0,0.026819199323654175
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,64,4,128,1,fp8,fp8,0,0.027006399631500245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,64,8,128,1,float16,float16,0,0.02895520031452179
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,64,8,128,1,float16,fp8,0,0.0267984002828598
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,64,64,128,1,float16,float16,0,0.020768000185489653
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,64,8,128,1,fp8,fp8,0,0.02727999985218048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,64,64,128,1,float16,fp8,0,0.01870400011539459
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,64,1,128,1,float16,float16,0,0.01886560022830963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,64,64,128,1,fp8,fp8,0,0.02011680006980896
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,64,1,128,1,float16,fp8,0,0.018646399676799773
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,64,2,128,1,float16,float16,0,0.018779200315475465
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,64,1,128,1,fp8,fp8,0,0.01907680034637451
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,64,2,128,1,float16,fp8,0,0.01860959976911545
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,64,2,128,1,fp8,fp8,0,0.018603199720382692
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,64,4,128,1,float16,float16,0,0.018913599848747253
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,64,4,128,1,float16,fp8,0,0.018747200071811677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,64,8,128,1,float16,float16,0,0.0186256006360054
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,64,4,128,1,fp8,fp8,0,0.018648000061511995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,64,8,128,1,float16,fp8,0,0.018692800402641298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,64,8,128,1,fp8,fp8,0,0.018692800402641298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,64,64,128,1,float16,float16,0,0.014550399780273438
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,64,64,128,1,fp8,fp8,0,0.014496000111103058
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,64,64,128,1,float16,fp8,0,0.014448000490665436
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,64,1,128,1,float16,float16,0,0.014446400105953217
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,64,1,128,1,float16,fp8,0,0.012476799637079239
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,64,2,128,1,float16,float16,0,0.014531199634075165
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,64,2,128,1,fp8,fp8,0,0.014056000113487243
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,64,2,128,1,float16,fp8,0,0.014552000164985656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,64,1,128,1,fp8,fp8,0,0.013600000739097595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,64,4,128,1,float16,float16,0,0.014508800208568573
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,64,4,128,1,fp8,fp8,0,0.012992000579833985
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,64,4,128,1,float16,fp8,0,0.013711999356746673
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,64,8,128,1,float16,float16,0,0.014486399292945863
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,64,8,128,1,float16,fp8,0,0.013171200454235078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,64,64,128,1,float16,float16,0,0.012465599924325943
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,64,8,128,1,fp8,fp8,0,0.013760000467300415
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,64,64,128,1,float16,fp8,0,0.01050880029797554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,64,64,128,1,fp8,fp8,0,0.010451199859380722
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,64,1,128,1,float16,float16,0,0.01040479987859726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,64,1,128,1,float16,fp8,0,0.010555200278759003
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,64,1,128,1,fp8,fp8,0,0.010449600219726563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,64,2,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,64,2,128,1,float16,fp8,0,0.010521599650382995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,64,2,128,1,fp8,fp8,0,0.010443200170993806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,64,4,128,1,float16,float16,0,0.010475199669599533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,64,4,128,1,float16,fp8,0,0.010494399815797806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,64,4,128,1,fp8,fp8,0,0.010505600273609162
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,64,8,128,1,float16,float16,0,0.010422399640083313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,64,8,128,1,fp8,fp8,0,0.010425599664449692
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,64,64,128,1,float16,float16,0,0.012432000041007996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,64,8,128,1,float16,fp8,0,0.010513599961996078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,64,64,128,1,float16,fp8,0,0.010435199737548828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,64,64,128,1,fp8,fp8,0,0.010412800312042236
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,64,1,128,1,float16,float16,0,0.010476800054311753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,64,1,128,1,float16,fp8,0,0.010451199859380722
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,64,1,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,64,2,128,1,float16,float16,0,0.010606399923563003
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,64,2,128,1,float16,fp8,0,0.010475199669599533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,64,2,128,1,fp8,fp8,0,0.0103472001850605
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,64,4,128,1,float16,float16,0,0.01063840016722679
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,64,4,128,1,float16,fp8,0,0.010284800082445145
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,64,4,128,1,fp8,fp8,0,0.010451199859380722
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,64,8,128,1,float16,float16,0,0.010542400181293488
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,64,8,128,1,float16,fp8,0,0.010387200117111205
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,64,8,128,1,fp8,fp8,0,0.010398399829864503
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,64,1,128,1,float16,float16,0,0.3179152011871338
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,64,1,128,1,fp8,fp8,0,0.2975167989730835
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,64,1,128,1,float16,fp8,0,0.297161602973938
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,64,2,128,1,float16,float16,0,0.3179408073425293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,64,2,128,1,float16,fp8,0,0.29770081043243407
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,64,2,128,1,fp8,fp8,0,0.29723999500274656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,64,4,128,1,float16,float16,0,0.31778559684753416
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,64,4,128,1,float16,fp8,0,0.29737439155578616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,64,4,128,1,fp8,fp8,0,0.2973167896270752
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,64,8,128,1,float16,float16,0,0.3179744005203247
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,64,8,128,1,float16,fp8,0,0.297379207611084
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,64,8,128,1,fp8,fp8,0,0.29731359481811526
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,64,64,128,1,float16,float16,0,0.1662976026535034
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,64,64,128,1,float16,fp8,0,0.15390559434890747
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,64,64,128,1,fp8,fp8,0,0.15399680137634278
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,64,1,128,1,float16,float16,0,0.16430879831314088
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,64,1,128,1,float16,fp8,0,0.1537775993347168
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,64,1,128,1,fp8,fp8,0,0.15380640029907228
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,64,2,128,1,float16,float16,0,0.16420639753341676
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,64,2,128,1,float16,fp8,0,0.1537616014480591
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,64,2,128,1,fp8,fp8,0,0.15374079942703248
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,64,4,128,1,float16,float16,0,0.16420799493789673
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,64,4,128,1,float16,fp8,0,0.15394079685211182
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,64,4,128,1,fp8,fp8,0,0.15385279655456544
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,64,8,128,1,float16,float16,0,0.16405119895935058
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,64,8,128,1,float16,fp8,0,0.15383360385894776
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,64,8,128,1,fp8,fp8,0,0.15377919673919677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,64,64,128,1,float16,float16,0,0.0883791983127594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,64,64,128,1,float16,fp8,0,0.08212479948997498
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,64,64,128,1,fp8,fp8,0,0.08217599987983704
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,64,1,128,1,float16,float16,0,0.08830559849739075
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,64,1,128,1,float16,fp8,0,0.08167840242385864
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,64,1,128,1,fp8,fp8,0,0.08206080198287964
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,64,2,128,1,float16,fp8,0,0.08218719959259033
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,64,2,128,1,fp8,fp8,0,0.08214399814605713
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,64,2,128,1,float16,float16,0,0.08834559917449951
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,64,4,128,1,float16,float16,0,0.08825439810752869
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,64,4,128,1,float16,fp8,0,0.08211680054664612
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,64,4,128,1,fp8,fp8,0,0.08211359977722169
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,64,8,128,1,float16,float16,0,0.08835039734840393
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,64,8,128,1,float16,fp8,0,0.08212000131607056
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,64,8,128,1,fp8,fp8,0,0.08220319747924805
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,64,64,128,1,float16,float16,0,0.05100160241127014
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,64,64,128,1,float16,fp8,0,0.045270401239395144
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,64,64,128,1,fp8,fp8,0,0.04547359943389893
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,64,1,128,1,float16,float16,0,0.04944800138473511
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,64,1,128,1,float16,fp8,0,0.04525760114192963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,64,1,128,1,fp8,fp8,0,0.04532319903373718
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,64,2,128,1,float16,float16,0,0.04943839907646179
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,64,2,128,1,float16,fp8,0,0.04524640142917633
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,64,2,128,1,fp8,fp8,0,0.04529280066490173
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,64,4,128,1,fp8,fp8,0,0.045228800177574156
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,64,4,128,1,float16,fp8,0,0.04527199864387512
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,64,4,128,1,float16,float16,0,0.04938719868659973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,64,8,128,1,float16,float16,0,0.04939680099487305
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,64,8,128,1,float16,fp8,0,0.045239999890327454
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,64,8,128,1,fp8,fp8,0,0.04520640075206757
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,64,64,128,1,float16,float16,0,0.030875200033187868
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,64,1,128,1,float16,float16,0,0.02880159914493561
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,64,64,128,1,fp8,fp8,0,0.026840001344680786
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,64,64,128,1,float16,fp8,0,0.027011200785636902
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,64,1,128,1,float16,fp8,0,0.026910400390625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,64,1,128,1,fp8,fp8,0,0.026788800954818726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,64,2,128,1,float16,float16,0,0.02889760136604309
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,64,2,128,1,float16,fp8,0,0.026894399523735048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,64,2,128,1,fp8,fp8,0,0.02688640058040619
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,64,4,128,1,float16,float16,0,0.02886880040168762
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,64,4,128,1,float16,fp8,0,0.026915198564529418
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,64,4,128,1,fp8,fp8,0,0.026943999528884887
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,64,8,128,1,float16,float16,0,0.0289247989654541
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,64,8,128,1,float16,fp8,0,0.02688319981098175
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,64,8,128,1,fp8,fp8,0,0.026956799626350402
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,64,64,128,1,float16,float16,0,0.02065120041370392
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,64,64,128,1,float16,fp8,0,0.018697600066661834
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,64,64,128,1,fp8,fp8,0,0.018751999735832213
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,64,1,128,1,float16,float16,0,0.018651199340820313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,64,1,128,1,float16,fp8,0,0.018668800592422485
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,64,2,128,1,float16,float16,0,0.018607999384403228
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,64,1,128,1,fp8,fp8,0,0.01878879964351654
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,64,2,128,1,float16,fp8,0,0.018691200017929076
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,64,2,128,1,fp8,fp8,0,0.018592000007629395
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,64,4,128,1,float16,float16,0,0.01881439983844757
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,64,4,128,1,float16,fp8,0,0.018726399540901183
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,64,4,128,1,fp8,fp8,0,0.01863519996404648
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,64,8,128,1,float16,float16,0,0.01876319944858551
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,64,8,128,1,float16,fp8,0,0.01873600035905838
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,64,8,128,1,fp8,fp8,0,0.01854880005121231
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,64,64,128,1,float16,fp8,0,0.014524799585342408
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,64,64,128,1,float16,float16,0,0.01656640022993088
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,64,64,128,1,fp8,fp8,0,0.014500799775123595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,64,1,128,1,float16,float16,0,0.014455999433994293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,64,1,128,1,float16,fp8,0,0.013363200426101684
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,64,1,128,1,fp8,fp8,0,0.012583999335765839
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,64,2,128,1,float16,float16,0,0.014473600685596466
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,64,2,128,1,float16,fp8,0,0.014724799990653991
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,64,2,128,1,fp8,fp8,0,0.012564800679683685
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,64,4,128,1,float16,float16,0,0.014443199336528777
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,64,4,128,1,float16,fp8,0,0.013655999302864074
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,64,4,128,1,fp8,fp8,0,0.012507200241088867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,64,8,128,1,float16,float16,0,0.014564800262451171
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,64,8,128,1,float16,fp8,0,0.014120000600814819
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,64,8,128,1,fp8,fp8,0,0.012455999851226807
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,64,64,128,1,float16,float16,0,0.012664000689983367
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,64,64,128,1,float16,fp8,0,0.010441599786281586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,64,1,128,1,float16,float16,0,0.01058880016207695
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,64,64,128,1,fp8,fp8,0,0.0104592002928257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,64,1,128,1,float16,fp8,0,0.010494399815797806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,64,1,128,1,fp8,fp8,0,0.010502400249242783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,64,2,128,1,float16,float16,0,0.01064319983124733
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,64,2,128,1,float16,fp8,0,0.010507199913263321
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,64,2,128,1,fp8,fp8,0,0.010539200156927109
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,64,4,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,64,4,128,1,float16,fp8,0,0.010585600137710571
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,64,4,128,1,fp8,fp8,0,0.010503999888896942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,64,8,128,1,float16,float16,0,0.010475199669599533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,64,8,128,1,float16,fp8,0,0.01055999994277954
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,64,8,128,1,fp8,fp8,0,0.010473600029945374
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,64,64,128,1,float16,float16,0,0.012414400279521943
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,64,64,128,1,float16,fp8,0,0.010579200088977813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,64,1,128,1,float16,float16,0,0.010391999781131745
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,64,64,128,1,fp8,fp8,0,0.010505600273609162
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,64,1,128,1,float16,fp8,0,0.010590399801731109
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,64,1,128,1,fp8,fp8,0,0.010395199805498124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,64,2,128,1,float16,float16,0,0.010491199791431427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,64,2,128,1,float16,fp8,0,0.010539200156927109
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,64,2,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,64,4,128,1,float16,float16,0,0.010473600029945374
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,64,4,128,1,float16,fp8,0,0.010503999888896942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,64,8,128,1,float16,float16,0,0.010532800108194351
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,64,4,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,64,8,128,1,float16,fp8,0,0.010476800054311753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,64,8,128,1,fp8,fp8,0,0.010363200306892395
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,48,1,128,1,fp8,fp8,0,16.651515197753906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,48,1,128,1,float16,fp8,0,17.640611267089845
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,48,1,128,1,float16,float16,0,27.275473022460936
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,48,2,128,1,float16,float16,0,27.132330322265624
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,48,2,128,1,float16,fp8,0,18.488087463378907
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,48,2,128,1,fp8,fp8,0,19.176847839355467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,48,4,128,1,float16,fp8,0,17.124961853027344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,48,4,128,1,fp8,fp8,0,17.87670135498047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,48,4,128,1,float16,float16,0,28.524862670898436
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,48,8,128,1,float16,fp8,0,18.006562805175783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,48,8,128,1,float16,float16,0,30.553201293945314
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,48,8,128,1,fp8,fp8,0,18.330667114257814
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,48,48,128,1,float16,fp8,0,8.634561920166016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,48,48,128,1,fp8,fp8,0,9.140560150146484
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,48,1,128,1,float16,float16,0,14.742256164550781
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,48,1,128,1,float16,fp8,0,8.209630584716797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,48,1,128,1,fp8,fp8,0,8.730865478515625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,48,2,128,1,float16,fp8,0,8.748827362060547
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,48,2,128,1,fp8,fp8,0,9.468993377685546
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,48,2,128,1,float16,float16,0,14.453529357910156
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,48,4,128,1,float16,fp8,0,9.623095703125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,48,4,128,1,float16,float16,0,14.704568481445312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,48,4,128,1,fp8,fp8,0,8.586353302001953
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,48,8,128,1,float16,fp8,0,8.559339141845703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,48,8,128,1,fp8,fp8,0,8.824944305419923
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,48,8,128,1,float16,float16,0,15.356742858886719
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,48,48,128,1,float16,fp8,0,4.531289672851562
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,48,48,128,1,fp8,fp8,0,4.69276008605957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,48,1,128,1,float16,float16,0,7.538038635253907
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,48,1,128,1,float16,fp8,0,4.557899093627929
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,48,1,128,1,fp8,fp8,0,4.391140747070312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,48,2,128,1,float16,float16,0,7.535259246826172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,48,2,128,1,float16,fp8,0,4.37041130065918
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,48,2,128,1,fp8,fp8,0,4.295375823974609
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,48,4,128,1,float16,fp8,0,4.131884765625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,48,4,128,1,float16,float16,0,6.916659545898438
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,48,4,128,1,fp8,fp8,0,4.223175811767578
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,48,8,128,1,float16,float16,0,7.364281463623047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,48,8,128,1,float16,fp8,0,4.342614364624024
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,48,8,128,1,fp8,fp8,0,4.435300827026367
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,48,48,128,1,float16,fp8,0,2.288540840148926
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,48,48,128,1,fp8,fp8,0,2.1946287155151367
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,48,1,128,1,float16,float16,0,3.016537666320801
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,48,1,128,1,float16,fp8,0,2.212246322631836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,48,1,128,1,fp8,fp8,0,2.066147232055664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,48,2,128,1,float16,float16,0,2.7447887420654298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,48,2,128,1,float16,fp8,0,2.356787109375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,48,2,128,1,fp8,fp8,0,2.099403190612793
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,48,4,128,1,float16,float16,0,2.81759033203125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,48,4,128,1,float16,fp8,0,2.250966453552246
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,48,4,128,1,fp8,fp8,0,2.163934326171875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,48,8,128,1,float16,float16,0,2.920235252380371
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,48,8,128,1,float16,fp8,0,2.430878448486328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,48,8,128,1,fp8,fp8,0,2.1270095825195314
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,48,1,128,1,float16,float16,0,16.942352294921875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,48,1,128,1,float16,fp8,0,10.006334686279297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,48,1,128,1,fp8,fp8,0,10.027180480957032
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,48,48,128,1,float16,float16,0,2.7881471633911135
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,48,48,128,1,float16,float16,0,13.421919250488282
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,48,48,128,1,float16,float16,0,6.398355102539062
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,48,2,128,1,float16,float16,0,17.133688354492186
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,48,2,128,1,float16,fp8,0,9.905496215820312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,48,2,128,1,fp8,fp8,0,9.381934356689452
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,48,4,128,1,float16,fp8,0,9.877361297607422
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,48,4,128,1,fp8,fp8,0,10.03676986694336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,48,4,128,1,float16,float16,0,15.202005004882812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,48,8,128,1,float16,fp8,0,10.33691864013672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,48,48,128,1,float16,float16,0,8.802310180664062
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,48,8,128,1,fp8,fp8,0,11.377750396728516
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,48,8,128,1,float16,float16,0,16.721775817871094
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,48,48,128,1,float16,fp8,0,5.468689727783203
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,48,48,128,1,fp8,fp8,0,5.0163616180419925
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,48,1,128,1,float16,fp8,0,4.687542343139649
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,48,1,128,1,fp8,fp8,0,5.065204620361328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,48,1,128,1,float16,float16,0,8.73304443359375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,48,2,128,1,float16,fp8,0,4.729927825927734
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,48,2,128,1,float16,float16,0,7.512000274658203
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,48,2,128,1,fp8,fp8,0,5.0589599609375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,48,4,128,1,float16,fp8,0,4.85460319519043
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,48,4,128,1,float16,float16,0,8.588504028320312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,48,4,128,1,fp8,fp8,0,4.78667984008789
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,48,8,128,1,float16,float16,0,8.483102416992187
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,48,8,128,1,float16,fp8,0,4.746116638183594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,48,48,128,1,float16,float16,0,3.318756866455078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,48,8,128,1,fp8,fp8,0,5.201446533203125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,48,48,128,1,fp8,fp8,0,2.5862112045288086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,48,48,128,1,float16,fp8,0,2.7933231353759767
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,48,1,128,1,float16,fp8,0,2.5064815521240233
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,48,1,128,1,float16,float16,0,4.184105682373047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,48,1,128,1,fp8,fp8,0,2.531737518310547
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,48,2,128,1,float16,float16,0,3.6202606201171874
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,48,2,128,1,float16,fp8,0,2.6138320922851563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,48,2,128,1,fp8,fp8,0,2.5694847106933594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,48,4,128,1,float16,float16,0,3.291223907470703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,48,4,128,1,float16,fp8,0,2.499750328063965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,48,4,128,1,fp8,fp8,0,2.500912094116211
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,48,8,128,1,float16,float16,0,3.7786609649658205
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,48,48,128,1,float16,float16,0,1.6021743774414063
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,48,8,128,1,float16,fp8,0,2.5911136627197267
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,48,8,128,1,fp8,fp8,0,2.580948829650879
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,48,48,128,1,float16,fp8,0,1.511415958404541
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,48,48,128,1,fp8,fp8,0,1.6772111892700194
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,48,1,128,1,float16,float16,0,1.428004837036133
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,48,1,128,1,float16,fp8,0,1.2422032356262207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,48,1,128,1,fp8,fp8,0,1.218727970123291
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,48,2,128,1,float16,float16,0,1.3972496032714843
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,48,2,128,1,fp8,fp8,0,1.23056640625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,48,2,128,1,float16,fp8,0,1.6072320938110352
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,48,4,128,1,float16,float16,0,1.3570655822753905
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,48,4,128,1,fp8,fp8,0,1.219172763824463
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,48,8,128,1,float16,float16,0,1.4006159782409668
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,48,4,128,1,float16,fp8,0,1.8416431427001954
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,48,8,128,1,float16,fp8,0,1.3558223724365235
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,48,8,128,1,fp8,fp8,0,1.2198047637939453
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,48,1,128,1,float16,fp8,0,7.311089324951172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,48,1,128,1,fp8,fp8,0,6.822684478759766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,48,1,128,1,float16,float16,0,11.57720947265625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,48,2,128,1,float16,float16,0,11.493998718261718
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,48,2,128,1,fp8,fp8,0,6.926113891601562
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,48,2,128,1,float16,fp8,0,7.502454376220703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,48,4,128,1,float16,fp8,0,7.092062377929688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,48,4,128,1,fp8,fp8,0,7.082039642333984
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,48,4,128,1,float16,float16,0,11.521678161621093
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,48,8,128,1,float16,fp8,0,7.300911712646484
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,48,8,128,1,float16,float16,0,12.98021240234375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,48,48,128,1,float16,float16,0,5.627009582519531
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,48,8,128,1,fp8,fp8,0,7.096598052978516
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,48,48,128,1,float16,fp8,0,3.6807086944580076
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,48,48,128,1,fp8,fp8,0,3.9801967620849608
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,48,1,128,1,float16,fp8,0,3.3203582763671875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,48,1,128,1,float16,float16,0,5.518558502197266
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,48,1,128,1,fp8,fp8,0,3.543467330932617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,48,2,128,1,float16,fp8,0,3.2815216064453123
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,48,2,128,1,float16,float16,0,5.795076751708985
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,48,2,128,1,fp8,fp8,0,3.626724624633789
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,48,4,128,1,float16,float16,0,5.172555160522461
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,48,4,128,1,fp8,fp8,0,3.3281375885009767
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,48,4,128,1,float16,fp8,0,3.509859085083008
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,48,48,128,1,float16,float16,0,2.123200035095215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,48,8,128,1,float16,fp8,0,3.2700977325439453
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,48,8,128,1,fp8,fp8,0,3.4043121337890625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,48,48,128,1,float16,fp8,0,2.235411262512207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,48,48,128,1,fp8,fp8,0,1.8068592071533203
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,48,8,128,1,float16,float16,0,5.803139114379883
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,48,1,128,1,float16,float16,0,2.482371139526367
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,48,1,128,1,fp8,fp8,0,1.6761360168457031
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,48,1,128,1,float16,fp8,0,1.7570991516113281
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,48,2,128,1,float16,float16,0,2.806705665588379
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,48,2,128,1,float16,fp8,0,1.946156883239746
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,48,2,128,1,fp8,fp8,0,1.7535072326660157
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,48,4,128,1,float16,float16,0,2.3413440704345705
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,48,4,128,1,fp8,fp8,0,1.667198371887207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,48,4,128,1,float16,fp8,0,2.099558448791504
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,48,8,128,1,float16,float16,0,1.891326332092285
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,48,8,128,1,float16,fp8,0,2.290096092224121
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,48,48,128,1,float16,float16,0,1.1241616249084472
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,48,8,128,1,fp8,fp8,0,1.6754064559936523
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,48,48,128,1,float16,fp8,0,1.122913646697998
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,48,1,128,1,float16,fp8,0,0.8854960441589356
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,48,48,128,1,fp8,fp8,0,1.0407487869262695
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,48,1,128,1,fp8,fp8,0,0.8846223831176758
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,48,1,128,1,float16,float16,0,1.2402463912963868
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,48,2,128,1,float16,fp8,0,0.8770895957946777
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,48,2,128,1,float16,float16,0,0.9891327857971192
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,48,2,128,1,fp8,fp8,0,0.8871600151062011
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,48,4,128,1,float16,float16,0,1.289902400970459
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,48,4,128,1,float16,fp8,0,0.8756655693054199
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,48,4,128,1,fp8,fp8,0,0.9862064361572266
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,48,8,128,1,float16,float16,0,1.0135279655456544
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,48,8,128,1,fp8,fp8,0,0.8805760383605957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,48,8,128,1,float16,fp8,0,1.3387231826782227
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,48,1,128,1,float16,fp8,0,9.174995422363281
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,48,1,128,1,fp8,fp8,0,9.796083068847656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,48,1,128,1,float16,float16,0,14.97497100830078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,48,2,128,1,float16,float16,0,14.539752197265624
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,48,2,128,1,float16,fp8,0,9.639472198486327
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,48,2,128,1,fp8,fp8,0,9.791529846191406
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,48,4,128,1,float16,fp8,0,9.326403045654297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,48,4,128,1,fp8,fp8,0,9.610892486572265
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,48,4,128,1,float16,float16,0,15.173968505859374
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,48,8,128,1,float16,fp8,0,9.067246246337891
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,48,8,128,1,float16,float16,0,16.887741088867188
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,48,48,128,1,float16,float16,0,8.194904327392578
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,48,8,128,1,fp8,fp8,0,9.648480224609376
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,48,48,128,1,float16,fp8,0,4.9059806823730465
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,48,48,128,1,fp8,fp8,0,5.239678573608399
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,48,1,128,1,float16,fp8,0,4.629312133789062
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,48,1,128,1,fp8,fp8,0,4.492118453979492
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,48,1,128,1,float16,float16,0,7.279222106933593
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,48,2,128,1,float16,fp8,0,4.641988754272461
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,48,2,128,1,fp8,fp8,0,4.502068710327149
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,48,2,128,1,float16,float16,0,7.8335212707519535
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,48,4,128,1,float16,float16,0,7.574980926513672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,48,4,128,1,float16,fp8,0,4.645264053344727
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,48,4,128,1,fp8,fp8,0,4.527174377441407
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,48,48,128,1,float16,float16,0,3.172279930114746
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,48,8,128,1,fp8,fp8,0,4.6989295959472654
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,48,8,128,1,float16,fp8,0,4.510603332519532
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,48,8,128,1,float16,float16,0,8.13364486694336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,48,48,128,1,float16,fp8,0,2.6721296310424805
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,48,48,128,1,fp8,fp8,0,2.4104639053344727
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,48,1,128,1,float16,float16,0,3.2771793365478517
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,48,1,128,1,float16,fp8,0,2.5886640548706055
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,48,1,128,1,fp8,fp8,0,2.167737579345703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,48,2,128,1,float16,float16,0,3.127025604248047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,48,2,128,1,fp8,fp8,0,2.4605791091918947
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,48,2,128,1,float16,fp8,0,2.4071168899536133
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,48,4,128,1,float16,float16,0,2.8767679214477537
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,48,4,128,1,fp8,fp8,0,2.371059226989746
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,48,8,128,1,float16,float16,0,2.4409759521484373
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,48,4,128,1,float16,fp8,0,2.529092788696289
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,48,8,128,1,float16,fp8,0,2.4538543701171873
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,48,48,128,1,float16,fp8,0,1.2148143768310546
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,48,48,128,1,float16,float16,0,2.0985391616821287
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,48,8,128,1,fp8,fp8,0,2.3397167205810545
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,48,48,128,1,fp8,fp8,0,1.2333503723144532
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,48,1,128,1,float16,float16,0,1.2763792037963868
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,48,1,128,1,fp8,fp8,0,1.1322735786437987
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,48,1,128,1,float16,fp8,0,1.5687808036804198
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,48,2,128,1,float16,float16,0,1.3762864112854003
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,48,2,128,1,float16,fp8,0,1.3755151748657226
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,48,2,128,1,fp8,fp8,0,1.133903980255127
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,48,4,128,1,float16,float16,0,1.298523235321045
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,48,4,128,1,float16,fp8,0,1.3205904006958007
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,48,4,128,1,fp8,fp8,0,1.2394736289978028
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,48,8,128,1,float16,float16,0,1.3076191902160645
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,48,8,128,1,fp8,fp8,0,1.1669183731079102
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,48,8,128,1,float16,fp8,0,1.438974380493164
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,48,48,128,1,float16,fp8,0,1.0194704055786132
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,48,48,128,1,fp8,fp8,0,0.6578192234039306
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,48,1,128,1,float16,float16,0,0.7482560157775879
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,48,1,128,1,float16,fp8,0,0.6497663974761962
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,48,1,128,1,fp8,fp8,0,0.6039487838745117
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,48,48,128,1,float16,float16,0,0.728115177154541
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,48,2,128,1,float16,float16,0,0.8051263809204101
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,48,2,128,1,float16,fp8,0,0.5954512119293213
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,48,2,128,1,fp8,fp8,0,0.5991151809692383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,48,4,128,1,float16,float16,0,0.811575984954834
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,48,4,128,1,float16,fp8,0,0.5996143817901611
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,48,4,128,1,fp8,fp8,0,0.5987936019897461
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,48,8,128,1,float16,fp8,0,0.5996032238006592
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,48,8,128,1,fp8,fp8,0,0.599118423461914
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,48,8,128,1,float16,float16,0,0.7922063827514648
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,48,1,128,1,float16,fp8,0,5.1223102569580075
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,48,1,128,1,fp8,fp8,0,5.2111774444580075
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,48,1,128,1,float16,float16,0,8.104657745361328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,48,2,128,1,float16,fp8,0,5.201545715332031
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,48,2,128,1,fp8,fp8,0,5.298118209838867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,48,2,128,1,float16,float16,0,8.092052459716797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,48,4,128,1,float16,float16,0,8.240153503417968
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,48,4,128,1,float16,fp8,0,5.11444320678711
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,48,4,128,1,fp8,fp8,0,5.252705764770508
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,48,48,128,1,float16,float16,0,3.6594688415527346
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,48,8,128,1,float16,float16,0,8.856377410888673
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,48,8,128,1,float16,fp8,0,5.190121459960937
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,48,8,128,1,fp8,fp8,0,5.395163345336914
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,48,48,128,1,float16,fp8,0,2.865585517883301
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,48,48,128,1,fp8,fp8,0,2.962481689453125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,48,1,128,1,float16,fp8,0,2.694540786743164
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,48,1,128,1,float16,float16,0,3.323191833496094
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,48,1,128,1,fp8,fp8,0,2.6635583877563476
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,48,2,128,1,float16,fp8,0,2.781447982788086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,48,2,128,1,float16,float16,0,3.4206432342529296
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,48,2,128,1,fp8,fp8,0,2.9032272338867187
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,48,4,128,1,float16,float16,0,4.246755218505859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,48,4,128,1,float16,fp8,0,2.793231964111328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,48,8,128,1,float16,float16,0,3.360950469970703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,48,4,128,1,fp8,fp8,0,3.332564926147461
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,48,8,128,1,fp8,fp8,0,2.7013504028320314
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,48,48,128,1,float16,fp8,0,1.5470591545104981
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,48,8,128,1,float16,fp8,0,2.70830078125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,48,48,128,1,float16,float16,0,2.1610591888427733
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,48,48,128,1,fp8,fp8,0,1.544547176361084
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,48,1,128,1,float16,fp8,0,1.4094736099243164
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,48,1,128,1,float16,float16,0,1.6298368453979493
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,48,1,128,1,fp8,fp8,0,1.5798975944519043
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,48,2,128,1,float16,fp8,0,1.2824928283691406
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,48,2,128,1,float16,float16,0,1.4366607666015625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,48,2,128,1,fp8,fp8,0,1.661412811279297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,48,4,128,1,float16,float16,0,1.4089152336120605
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,48,4,128,1,float16,fp8,0,1.281057643890381
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,48,4,128,1,fp8,fp8,0,1.5161664009094238
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,48,8,128,1,float16,float16,0,1.5720527648925782
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,48,8,128,1,float16,fp8,0,1.404199981689453
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,48,8,128,1,fp8,fp8,0,1.2865424156188965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,48,48,128,1,float16,float16,0,0.8423888206481933
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,48,48,128,1,float16,fp8,0,1.014516830444336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,48,48,128,1,fp8,fp8,0,0.7489232063293457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,48,1,128,1,float16,float16,0,0.7508351802825928
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,48,1,128,1,float16,fp8,0,0.6889967918395996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,48,2,128,1,float16,float16,0,0.7446832180023193
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,48,1,128,1,fp8,fp8,0,0.9404272079467774
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,48,2,128,1,float16,fp8,0,0.6817008018493652
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,48,2,128,1,fp8,fp8,0,0.6889616012573242
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,48,4,128,1,float16,float16,0,0.7494160175323487
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,48,4,128,1,float16,fp8,0,0.6789760112762451
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,48,4,128,1,fp8,fp8,0,0.7859024047851563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,48,8,128,1,float16,float16,0,0.7688096046447754
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,48,48,128,1,float16,float16,0,0.45378718376159666
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,48,8,128,1,float16,fp8,0,0.7744927883148194
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,48,48,128,1,float16,fp8,0,0.5134431838989257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,48,8,128,1,fp8,fp8,0,0.673041582107544
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,48,48,128,1,fp8,fp8,0,0.40772318840026855
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,48,1,128,1,float16,float16,0,0.4136335849761963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,48,1,128,1,fp8,fp8,0,0.36952641010284426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,48,1,128,1,float16,fp8,0,0.477945613861084
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,48,2,128,1,float16,float16,0,0.4158207893371582
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,48,2,128,1,float16,fp8,0,0.36968801021575926
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,48,2,128,1,fp8,fp8,0,0.37092161178588867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,48,4,128,1,float16,float16,0,0.41974239349365233
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,48,4,128,1,float16,fp8,0,0.37198400497436523
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,48,4,128,1,fp8,fp8,0,0.3696192026138306
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,48,8,128,1,float16,float16,0,0.4181215763092041
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,48,8,128,1,float16,fp8,0,0.3749200105667114
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,48,8,128,1,fp8,fp8,0,0.3718767881393433
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,48,1,128,1,fp8,fp8,0,4.84741439819336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,48,1,128,1,float16,fp8,0,5.035055923461914
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,48,2,128,1,float16,float16,0,7.122030639648438
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,48,1,128,1,float16,float16,0,7.947869110107422
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,48,2,128,1,float16,fp8,0,4.9734447479248045
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,48,2,128,1,fp8,fp8,0,5.078815841674805
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,48,4,128,1,float16,fp8,0,4.803879928588867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,48,4,128,1,float16,float16,0,8.087086486816407
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,48,4,128,1,fp8,fp8,0,4.965779113769531
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,48,8,128,1,float16,fp8,0,5.002374267578125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,48,8,128,1,float16,float16,0,8.612059020996094
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,48,48,128,1,float16,float16,0,4.179542541503906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,48,8,128,1,fp8,fp8,0,5.133438491821289
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,48,48,128,1,float16,fp8,0,2.7654512405395506
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,48,48,128,1,fp8,fp8,0,3.268753433227539
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,48,1,128,1,float16,fp8,0,2.5424671173095703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,48,1,128,1,fp8,fp8,0,2.4003215789794923
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,48,1,128,1,float16,float16,0,3.9636558532714843
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,48,2,128,1,float16,float16,0,3.2617729187011717
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,48,2,128,1,float16,fp8,0,2.4426687240600584
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,48,2,128,1,fp8,fp8,0,2.6568031311035156
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,48,4,128,1,float16,fp8,0,2.446886444091797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,48,4,128,1,fp8,fp8,0,2.5106319427490233
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,48,4,128,1,float16,float16,0,4.37402229309082
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,48,8,128,1,float16,float16,0,3.53175048828125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,48,8,128,1,float16,fp8,0,2.539081573486328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,48,8,128,1,fp8,fp8,0,2.391761589050293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,48,48,128,1,float16,float16,0,2.38098087310791
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,48,48,128,1,fp8,fp8,0,1.4088879585266114
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,48,48,128,1,float16,fp8,0,1.5105199813842773
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,48,1,128,1,float16,float16,0,1.3274383544921875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,48,1,128,1,float16,fp8,0,1.9009391784667968
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,48,1,128,1,fp8,fp8,0,1.236945629119873
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,48,2,128,1,float16,float16,0,1.3271615982055665
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,48,2,128,1,float16,fp8,0,1.7331327438354491
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,48,2,128,1,fp8,fp8,0,1.3411727905273438
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,48,4,128,1,float16,float16,0,1.4046192169189453
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,48,4,128,1,float16,fp8,0,1.4108976364135741
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,48,4,128,1,fp8,fp8,0,1.2253888130187989
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,48,8,128,1,float16,float16,0,1.3576080322265625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,48,48,128,1,float16,float16,0,0.7982287883758545
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,48,8,128,1,fp8,fp8,0,1.2568479537963868
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,48,8,128,1,float16,fp8,0,1.663862419128418
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,48,48,128,1,float16,fp8,0,1.0651439666748046
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,48,48,128,1,fp8,fp8,0,0.7284399986267089
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,48,1,128,1,float16,float16,0,0.7032127857208252
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,48,1,128,1,float16,fp8,0,0.674283218383789
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,48,1,128,1,fp8,fp8,0,0.6962656021118164
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,48,2,128,1,float16,float16,0,0.6914576053619385
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,48,2,128,1,float16,fp8,0,0.6534736156463623
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,48,2,128,1,fp8,fp8,0,0.6402416229248047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,48,4,128,1,float16,float16,0,0.7232207775115966
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,48,4,128,1,float16,fp8,0,0.6402224063873291
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,48,4,128,1,fp8,fp8,0,0.6904128074645997
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,48,8,128,1,float16,float16,0,0.7149680137634278
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,48,48,128,1,float16,float16,0,0.4303152084350586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,48,8,128,1,fp8,fp8,0,0.6410848140716553
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,48,8,128,1,float16,fp8,0,0.8123151779174804
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,48,48,128,1,float16,fp8,0,0.458403205871582
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,48,48,128,1,fp8,fp8,0,0.3867727994918823
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,48,1,128,1,float16,float16,0,0.38309440612792967
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,48,1,128,1,float16,fp8,0,0.3556240081787109
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,48,1,128,1,fp8,fp8,0,0.43186559677124026
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,48,2,128,1,float16,float16,0,0.3780287981033325
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,48,2,128,1,float16,fp8,0,0.34491040706634524
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,48,2,128,1,fp8,fp8,0,0.356278395652771
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,48,4,128,1,float16,float16,0,0.39384000301361083
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,48,4,128,1,float16,fp8,0,0.34458720684051514
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,48,4,128,1,fp8,fp8,0,0.3423007965087891
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,48,8,128,1,float16,float16,0,0.38876640796661377
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,48,48,128,1,float16,float16,0,0.23790080547332765
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,48,8,128,1,float16,fp8,0,0.3445280075073242
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,48,8,128,1,fp8,fp8,0,0.3436352014541626
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,48,48,128,1,float16,fp8,0,0.2174015998840332
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,48,48,128,1,fp8,fp8,0,0.21545920372009278
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,48,1,128,1,float16,float16,0,0.21375679969787598
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,48,1,128,1,float16,fp8,0,0.19437919855117797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,48,1,128,1,fp8,fp8,0,0.19212160110473633
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,48,2,128,1,float16,float16,0,0.20825119018554689
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,48,2,128,1,float16,fp8,0,0.19562239646911622
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,48,2,128,1,fp8,fp8,0,0.1955664038658142
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,48,4,128,1,float16,float16,0,0.2136512041091919
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,48,4,128,1,float16,fp8,0,0.19188480377197265
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,48,4,128,1,fp8,fp8,0,0.19410719871520996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,48,8,128,1,float16,float16,0,0.2178352117538452
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,48,8,128,1,float16,fp8,0,0.1910272002220154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,48,8,128,1,fp8,fp8,0,0.19092799425125123
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,48,1,128,1,float16,fp8,0,2.8981840133666994
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,48,1,128,1,fp8,fp8,0,2.8830240249633787
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,48,1,128,1,float16,float16,0,3.9483871459960938
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,48,2,128,1,float16,float16,0,4.008606338500977
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,48,2,128,1,float16,fp8,0,2.908460807800293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,48,2,128,1,fp8,fp8,0,2.984836769104004
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,48,4,128,1,float16,fp8,0,2.906372833251953
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,48,4,128,1,float16,float16,0,4.621206283569336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,48,4,128,1,fp8,fp8,0,2.9172224044799804
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,48,8,128,1,float16,float16,0,4.298676681518555
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,48,8,128,1,float16,fp8,0,2.8981456756591797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,48,48,128,1,float16,float16,0,1.9151039123535156
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,48,8,128,1,fp8,fp8,0,2.9289615631103514
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,48,48,128,1,fp8,fp8,0,1.7629119873046875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,48,48,128,1,float16,fp8,0,1.9761791229248047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,48,1,128,1,float16,float16,0,2.0437103271484376
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,48,1,128,1,float16,fp8,0,1.4853856086730957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,48,1,128,1,fp8,fp8,0,1.470315170288086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,48,2,128,1,float16,float16,0,1.7814815521240235
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,48,2,128,1,fp8,fp8,0,1.486729621887207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,48,2,128,1,float16,fp8,0,2.171703910827637
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,48,4,128,1,float16,float16,0,1.5560928344726563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,48,4,128,1,float16,fp8,0,1.7626655578613282
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,48,4,128,1,fp8,fp8,0,1.4768848419189453
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,48,8,128,1,float16,float16,0,1.5970319747924804
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,48,8,128,1,float16,fp8,0,1.5791248321533202
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,48,48,128,1,float16,float16,0,0.9873295783996582
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,48,8,128,1,fp8,fp8,0,1.604902458190918
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,48,48,128,1,fp8,fp8,0,0.9063424110412598
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,48,48,128,1,float16,fp8,0,1.1908304214477539
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,48,1,128,1,float16,float16,0,0.9377632141113281
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,48,1,128,1,float16,fp8,0,0.7959695816040039
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,48,1,128,1,fp8,fp8,0,0.7588912010192871
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,48,2,128,1,float16,float16,0,1.0549967765808106
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,48,2,128,1,float16,fp8,0,0.807852840423584
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,48,2,128,1,fp8,fp8,0,0.7583807945251465
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,48,4,128,1,float16,fp8,0,0.7936960220336914
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,48,4,128,1,fp8,fp8,0,0.7535744190216065
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,48,4,128,1,float16,float16,0,1.0484736442565918
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,48,8,128,1,float16,float16,0,0.8286591529846191
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,48,8,128,1,float16,fp8,0,0.7556384086608887
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,48,8,128,1,fp8,fp8,0,1.1202159881591798
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,48,48,128,1,float16,fp8,0,0.4640399932861328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,48,48,128,1,float16,float16,0,0.73613600730896
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,48,1,128,1,float16,float16,0,0.42891678810119627
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,48,48,128,1,fp8,fp8,0,0.4800384044647217
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,48,1,128,1,float16,fp8,0,0.5879024028778076
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,48,1,128,1,fp8,fp8,0,0.39699840545654297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,48,2,128,1,float16,float16,0,0.43938560485839845
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,48,2,128,1,fp8,fp8,0,0.39716639518737795
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,48,2,128,1,float16,fp8,0,0.5881711959838867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,48,4,128,1,float16,float16,0,0.4410719871520996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,48,4,128,1,float16,fp8,0,0.3957871913909912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,48,4,128,1,fp8,fp8,0,0.45391039848327636
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,48,8,128,1,float16,float16,0,0.45599517822265623
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,48,8,128,1,float16,fp8,0,0.39440319538116453
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,48,48,128,1,float16,float16,0,0.2853503942489624
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,48,8,128,1,fp8,fp8,0,0.4025279998779297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,48,48,128,1,float16,fp8,0,0.25127520561218264
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,48,1,128,1,float16,fp8,0,0.22054240703582764
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,48,1,128,1,float16,float16,0,0.2464672088623047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,48,48,128,1,fp8,fp8,0,0.2513472080230713
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,48,1,128,1,fp8,fp8,0,0.21548960208892823
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,48,2,128,1,float16,float16,0,0.23631041049957274
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,48,2,128,1,float16,fp8,0,0.22574241161346437
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,48,2,128,1,fp8,fp8,0,0.21815838813781738
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,48,4,128,1,float16,float16,0,0.23475360870361328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,48,4,128,1,float16,fp8,0,0.21716160774230958
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,48,4,128,1,fp8,fp8,0,0.21967680454254152
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,48,8,128,1,float16,float16,0,0.24218719005584716
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,48,8,128,1,float16,fp8,0,0.21753919124603271
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,48,48,128,1,float16,float16,0,0.156550395488739
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,48,8,128,1,fp8,fp8,0,0.2176975965499878
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,48,48,128,1,fp8,fp8,0,0.14375200271606445
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,48,48,128,1,float16,fp8,0,0.14251519441604615
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,48,1,128,1,float16,float16,0,0.13409600257873536
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,48,1,128,1,float16,fp8,0,0.125380802154541
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,48,1,128,1,fp8,fp8,0,0.12387200593948364
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,48,2,128,1,float16,fp8,0,0.1258784055709839
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,48,2,128,1,float16,float16,0,0.13378080129623413
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,48,2,128,1,fp8,fp8,0,0.12376639842987061
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,48,4,128,1,float16,float16,0,0.13219360113143921
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,48,4,128,1,float16,fp8,0,0.12516160011291505
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,48,8,128,1,float16,float16,0,0.13622560501098632
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,48,4,128,1,fp8,fp8,0,0.12436800003051758
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,48,8,128,1,float16,fp8,0,0.12328319549560547
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,48,8,128,1,fp8,fp8,0,0.12462400197982788
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,48,1,128,1,float16,fp8,0,2.9304895401000977
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,48,1,128,1,fp8,fp8,0,2.929540824890137
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,48,1,128,1,float16,float16,0,3.614972686767578
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,48,2,128,1,float16,float16,0,3.9616512298583983
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,48,2,128,1,float16,fp8,0,2.929607963562012
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,48,2,128,1,fp8,fp8,0,2.9360912322998045
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,48,4,128,1,float16,fp8,0,2.9971263885498045
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,48,4,128,1,float16,float16,0,4.660921478271485
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,48,4,128,1,fp8,fp8,0,2.9287471771240234
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,48,8,128,1,float16,float16,0,4.022451019287109
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,48,8,128,1,float16,fp8,0,2.945806312561035
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,48,48,128,1,float16,float16,0,2.208135986328125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,48,8,128,1,fp8,fp8,0,2.94345760345459
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,48,48,128,1,fp8,fp8,0,1.8385679244995117
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,48,48,128,1,float16,fp8,0,1.9679040908813477
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,48,1,128,1,float16,float16,0,1.6181407928466798
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,48,1,128,1,float16,fp8,0,1.4815808296203614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,48,1,128,1,fp8,fp8,0,1.4868608474731446
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,48,2,128,1,float16,float16,0,1.5180591583251952
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,48,2,128,1,fp8,fp8,0,1.4780879974365235
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,48,2,128,1,float16,fp8,0,1.6236928939819335
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,48,4,128,1,float16,float16,0,1.567240047454834
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,48,4,128,1,float16,fp8,0,1.5827119827270508
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,48,4,128,1,fp8,fp8,0,1.474169635772705
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,48,8,128,1,float16,float16,0,1.5883968353271485
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,48,8,128,1,fp8,fp8,0,1.4772432327270508
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,48,8,128,1,float16,fp8,0,1.6543920516967774
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,48,48,128,1,float16,float16,0,1.0237215995788573
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,48,48,128,1,float16,fp8,0,0.9740688323974609
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,48,1,128,1,float16,float16,0,0.7997312068939209
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,48,48,128,1,fp8,fp8,0,0.9330831527709961
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,48,1,128,1,fp8,fp8,0,0.7639647960662842
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,48,1,128,1,float16,fp8,0,1.020683193206787
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,48,2,128,1,float16,fp8,0,0.7563360214233399
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,48,2,128,1,float16,float16,0,0.8198384284973145
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,48,2,128,1,fp8,fp8,0,0.7587664127349854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,48,4,128,1,float16,float16,0,0.7997968196868896
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,48,4,128,1,float16,fp8,0,0.757316780090332
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,48,4,128,1,fp8,fp8,0,0.8896639823913575
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,48,8,128,1,float16,float16,0,0.8198687553405761
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,48,8,128,1,float16,fp8,0,0.758790397644043
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,48,48,128,1,float16,float16,0,0.5349904060363769
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,48,8,128,1,fp8,fp8,0,0.7550543785095215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,48,48,128,1,float16,fp8,0,0.4833536148071289
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,48,48,128,1,fp8,fp8,0,0.4854752063751221
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,48,1,128,1,float16,float16,0,0.4183951854705811
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,48,1,128,1,float16,fp8,0,0.3959536075592041
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,48,1,128,1,fp8,fp8,0,0.3955888032913208
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,48,2,128,1,float16,float16,0,0.4175680160522461
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,48,2,128,1,float16,fp8,0,0.3953183889389038
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,48,2,128,1,fp8,fp8,0,0.39528799057006836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,48,4,128,1,float16,float16,0,0.4467440128326416
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,48,4,128,1,float16,fp8,0,0.3955984115600586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,48,4,128,1,fp8,fp8,0,0.3940016031265259
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,48,8,128,1,float16,fp8,0,0.39364640712738036
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,48,8,128,1,float16,float16,0,0.47175040245056155
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,48,8,128,1,fp8,fp8,0,0.39226880073547366
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,48,48,128,1,float16,float16,0,0.280187201499939
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,48,48,128,1,float16,fp8,0,0.2890912055969238
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,48,48,128,1,fp8,fp8,0,0.2576128005981445
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,48,1,128,1,float16,float16,0,0.22582559585571288
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,48,1,128,1,float16,fp8,0,0.24212799072265626
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,48,1,128,1,fp8,fp8,0,0.21211040019989014
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,48,2,128,1,float16,float16,0,0.22533280849456788
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,48,2,128,1,float16,fp8,0,0.2128592014312744
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,48,2,128,1,fp8,fp8,0,0.2140239953994751
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,48,4,128,1,float16,float16,0,0.22888000011444093
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,48,4,128,1,float16,fp8,0,0.21324479579925537
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,48,4,128,1,fp8,fp8,0,0.2121504068374634
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,48,8,128,1,float16,float16,0,0.23255519866943358
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,48,8,128,1,float16,fp8,0,0.21238560676574708
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,48,8,128,1,fp8,fp8,0,0.21154561042785644
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,48,48,128,1,float16,float16,0,0.15446399450302123
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,48,48,128,1,float16,fp8,0,0.14252159595489503
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,48,48,128,1,fp8,fp8,0,0.1442415952682495
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,48,1,128,1,float16,float16,0,0.1250704050064087
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,48,1,128,1,float16,fp8,0,0.1182144045829773
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,48,1,128,1,fp8,fp8,0,0.11839040517807006
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,48,2,128,1,float16,float16,0,0.12596479654312134
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,48,2,128,1,float16,fp8,0,0.11852799654006958
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,48,2,128,1,fp8,fp8,0,0.11678400039672851
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,48,4,128,1,float16,float16,0,0.1275488018989563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,48,4,128,1,float16,fp8,0,0.11811200380325318
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,48,4,128,1,fp8,fp8,0,0.11753280162811279
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,48,8,128,1,float16,float16,0,0.13080639839172364
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,48,8,128,1,float16,fp8,0,0.11745760440826417
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,48,8,128,1,fp8,fp8,0,0.11745920181274414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,48,48,128,1,float16,float16,0,0.09247199892997741
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,48,48,128,1,float16,fp8,0,0.08294879794120788
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,48,48,128,1,fp8,fp8,0,0.08262720108032226
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,48,1,128,1,float16,float16,0,0.07577279806137086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,48,1,128,1,float16,fp8,0,0.07155200242996215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,48,1,128,1,fp8,fp8,0,0.07147840261459351
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,48,2,128,1,float16,float16,0,0.07555199861526489
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,48,2,128,1,float16,fp8,0,0.07131040096282959
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,48,4,128,1,float16,float16,0,0.07593920230865478
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,48,2,128,1,fp8,fp8,0,0.07160159945487976
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,48,4,128,1,float16,fp8,0,0.07121440172195434
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,48,4,128,1,fp8,fp8,0,0.07182239890098571
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,48,8,128,1,float16,float16,0,0.07625920176506043
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,48,8,128,1,float16,fp8,0,0.0716048002243042
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,48,8,128,1,fp8,fp8,0,0.07152479887008667
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,48,1,128,1,float16,fp8,0,1.867211151123047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,48,1,128,1,fp8,fp8,0,1.8656576156616211
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,48,1,128,1,float16,float16,0,2.099603271484375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,48,2,128,1,float16,float16,0,1.8692224502563477
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,48,2,128,1,float16,fp8,0,1.8631664276123048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,48,2,128,1,fp8,fp8,0,1.8615215301513672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,48,4,128,1,float16,float16,0,2.614313507080078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,48,4,128,1,float16,fp8,0,1.8946063995361329
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,48,4,128,1,fp8,fp8,0,1.8610544204711914
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,48,8,128,1,float16,float16,0,2.208683204650879
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,48,8,128,1,fp8,fp8,0,1.8567728042602538
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,48,8,128,1,float16,fp8,0,2.2751583099365233
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,48,48,128,1,float16,float16,0,1.326193618774414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,48,48,128,1,float16,fp8,0,1.3798159599304198
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,48,48,128,1,fp8,fp8,0,1.210643196105957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,48,1,128,1,float16,float16,0,0.9683792114257812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,48,1,128,1,float16,fp8,0,1.2453856468200684
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,48,1,128,1,fp8,fp8,0,0.9487808227539063
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,48,2,128,1,float16,float16,0,0.952940845489502
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,48,2,128,1,float16,fp8,0,0.9434576034545898
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,48,2,128,1,fp8,fp8,0,0.9485808372497558
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,48,4,128,1,float16,float16,0,0.9795951843261719
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,48,4,128,1,float16,fp8,0,0.9512415885925293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,48,4,128,1,fp8,fp8,0,0.9422800064086914
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,48,8,128,1,float16,float16,0,1.0148320198059082
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,48,8,128,1,float16,fp8,0,0.9732751846313477
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,48,48,128,1,float16,float16,0,0.6754735946655274
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,48,8,128,1,fp8,fp8,0,0.9416624069213867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,48,48,128,1,float16,fp8,0,0.6323599815368652
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,48,1,128,1,float16,float16,0,0.5033999919891358
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,48,48,128,1,fp8,fp8,0,0.6815552234649658
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,48,1,128,1,float16,fp8,0,0.4863999843597412
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,48,2,128,1,float16,float16,0,0.5036015987396241
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,48,1,128,1,fp8,fp8,0,0.4992991924285889
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,48,2,128,1,float16,fp8,0,0.4876863956451416
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,48,2,128,1,fp8,fp8,0,0.4863999843597412
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,48,4,128,1,float16,fp8,0,0.4883584022521973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,48,4,128,1,float16,float16,0,0.5294079780578613
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,48,4,128,1,fp8,fp8,0,0.4877488136291504
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,48,8,128,1,float16,float16,0,0.5195248126983643
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,48,8,128,1,float16,fp8,0,0.485595178604126
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,48,48,128,1,float16,float16,0,0.3517568111419678
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,48,8,128,1,fp8,fp8,0,0.5254591941833496
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,48,48,128,1,float16,fp8,0,0.3236128091812134
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,48,1,128,1,float16,float16,0,0.2915359973907471
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,48,48,128,1,fp8,fp8,0,0.3243056058883667
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,48,1,128,1,float16,fp8,0,0.2563136100769043
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,48,1,128,1,fp8,fp8,0,0.25634720325469973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,48,2,128,1,float16,float16,0,0.2678112030029297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,48,2,128,1,float16,fp8,0,0.25634241104125977
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,48,2,128,1,fp8,fp8,0,0.256057596206665
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,48,4,128,1,float16,float16,0,0.2702575922012329
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,48,4,128,1,float16,fp8,0,0.25552639961242674
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,48,4,128,1,fp8,fp8,0,0.2555567979812622
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,48,8,128,1,float16,float16,0,0.2781152009963989
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,48,8,128,1,float16,fp8,0,0.25578720569610597
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,48,48,128,1,float16,float16,0,0.18991199731826783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,48,8,128,1,fp8,fp8,0,0.25440800189971924
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,48,48,128,1,float16,fp8,0,0.17432639598846436
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,48,1,128,1,float16,float16,0,0.14592959880828857
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,48,48,128,1,fp8,fp8,0,0.17510080337524414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,48,1,128,1,fp8,fp8,0,0.1394736051559448
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,48,1,128,1,float16,fp8,0,0.140283203125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,48,2,128,1,float16,float16,0,0.1476032018661499
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,48,2,128,1,float16,fp8,0,0.1389631986618042
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,48,4,128,1,float16,float16,0,0.14901119470596313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,48,2,128,1,fp8,fp8,0,0.140664005279541
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,48,4,128,1,float16,fp8,0,0.1396944046020508
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,48,4,128,1,fp8,fp8,0,0.14078400135040284
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,48,8,128,1,float16,float16,0,0.14994239807128906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,48,8,128,1,float16,fp8,0,0.13890880346298218
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,48,8,128,1,fp8,fp8,0,0.14059040546417237
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,48,48,128,1,float16,float16,0,0.10393279790878296
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,48,48,128,1,float16,fp8,0,0.09850239753723145
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,48,48,128,1,fp8,fp8,0,0.09944159984588623
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,48,1,128,1,float16,float16,0,0.08140799999237061
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,48,1,128,1,float16,fp8,0,0.07932800054550171
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,48,1,128,1,fp8,fp8,0,0.08140479922294616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,48,2,128,1,float16,float16,0,0.08087360262870788
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,48,2,128,1,float16,fp8,0,0.07906720042228699
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,48,2,128,1,fp8,fp8,0,0.08111199736595154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,48,4,128,1,float16,float16,0,0.08263360261917115
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,48,4,128,1,float16,fp8,0,0.07899519801139832
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,48,4,128,1,fp8,fp8,0,0.08110560178756714
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,48,8,128,1,float16,float16,0,0.08473119735717774
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,48,8,128,1,float16,fp8,0,0.07880799770355225
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,48,8,128,1,fp8,fp8,0,0.07958239912986756
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,48,48,128,1,float16,float16,0,0.060020798444747926
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,48,48,128,1,float16,fp8,0,0.058340799808502194
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,48,48,128,1,fp8,fp8,0,0.058595198392868045
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,48,1,128,1,float16,float16,0,0.05148959755897522
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,48,1,128,1,float16,fp8,0,0.050128000974655154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,48,1,128,1,fp8,fp8,0,0.050316798686981204
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,48,2,128,1,float16,float16,0,0.05170559883117676
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,48,2,128,1,float16,fp8,0,0.0499455988407135
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,48,2,128,1,fp8,fp8,0,0.04997119903564453
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,48,4,128,1,float16,fp8,0,0.049721598625183105
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,48,4,128,1,float16,float16,0,0.051744002103805545
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,48,4,128,1,fp8,fp8,0,0.049979200959205626
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,48,8,128,1,float16,float16,0,0.053376001119613645
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,48,8,128,1,fp8,fp8,0,0.050678402185440063
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,48,8,128,1,float16,fp8,0,0.05087360143661499
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,48,1,128,1,float16,float16,0,2.127816009521484
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,48,1,128,1,float16,fp8,0,2.016494369506836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,48,1,128,1,fp8,fp8,0,2.016009521484375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,48,2,128,1,float16,float16,0,2.005569648742676
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,48,2,128,1,fp8,fp8,0,2.0146432876586915
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,48,4,128,1,float16,float16,0,2.0182783126831056
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,48,2,128,1,float16,fp8,0,2.1130655288696287
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,48,4,128,1,float16,fp8,0,2.1372032165527344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,48,4,128,1,fp8,fp8,0,2.01265926361084
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,48,8,128,1,float16,fp8,0,2.0017871856689453
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,48,8,128,1,float16,float16,0,2.504787254333496
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,48,48,128,1,float16,float16,0,1.4835359573364257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,48,48,128,1,float16,fp8,0,1.377355194091797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,48,8,128,1,fp8,fp8,0,2.011003112792969
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,48,48,128,1,fp8,fp8,0,1.5529888153076172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,48,1,128,1,float16,float16,0,1.05600004196167
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,48,1,128,1,float16,fp8,0,1.0199695587158204
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,48,1,128,1,fp8,fp8,0,1.0226960182189941
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,48,2,128,1,float16,float16,0,1.0024831771850586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,48,2,128,1,float16,fp8,0,1.0316960334777832
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,48,2,128,1,fp8,fp8,0,1.0184032440185546
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,48,4,128,1,float16,float16,0,1.032795238494873
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,48,4,128,1,float16,fp8,0,1.0207663536071778
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,48,4,128,1,fp8,fp8,0,1.0185503959655762
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,48,8,128,1,float16,float16,0,1.0718768119812012
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,48,8,128,1,float16,fp8,0,1.0614000320434571
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,48,8,128,1,fp8,fp8,0,1.0163151741027832
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,48,48,128,1,float16,float16,0,0.7573328018188477
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,48,48,128,1,float16,fp8,0,0.7180560111999512
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,48,1,128,1,float16,float16,0,0.5189727783203125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,48,48,128,1,fp8,fp8,0,0.6985904216766358
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,48,1,128,1,float16,fp8,0,0.5248432159423828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,48,1,128,1,fp8,fp8,0,0.5256832122802735
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,48,2,128,1,float16,float16,0,0.5170400142669678
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,48,2,128,1,float16,fp8,0,0.5231296062469483
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,48,2,128,1,fp8,fp8,0,0.5218624114990235
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,48,4,128,1,float16,float16,0,0.5302127838134766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,48,4,128,1,float16,fp8,0,0.5207488059997558
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,48,4,128,1,fp8,fp8,0,0.5220384120941162
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,48,8,128,1,float16,float16,0,0.5494624137878418
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,48,8,128,1,float16,fp8,0,0.5165023803710938
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,48,8,128,1,fp8,fp8,0,0.519320011138916
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,48,48,128,1,float16,float16,0,0.39032320976257323
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,48,48,128,1,float16,fp8,0,0.36199519634246824
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,48,48,128,1,fp8,fp8,0,0.3608223915100098
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,48,1,128,1,float16,fp8,0,0.271561598777771
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,48,1,128,1,float16,float16,0,0.2776639938354492
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,48,1,128,1,fp8,fp8,0,0.27147679328918456
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,48,2,128,1,float16,float16,0,0.2741616010665894
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,48,2,128,1,float16,fp8,0,0.28033759593963625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,48,2,128,1,fp8,fp8,0,0.2707360029220581
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,48,4,128,1,float16,float16,0,0.27860159873962403
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,48,4,128,1,float16,fp8,0,0.2807823896408081
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,48,4,128,1,fp8,fp8,0,0.2709264039993286
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,48,8,128,1,float16,float16,0,0.28958721160888673
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,48,8,128,1,float16,fp8,0,0.2735152006149292
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,48,8,128,1,fp8,fp8,0,0.2698224067687988
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,48,48,128,1,float16,float16,0,0.2048799991607666
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,48,48,128,1,float16,fp8,0,0.19142240285873413
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,48,1,128,1,float16,float16,0,0.14804480075836182
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,48,1,128,1,float16,fp8,0,0.14686720371246337
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,48,48,128,1,fp8,fp8,0,0.191428804397583
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,48,2,128,1,float16,float16,0,0.14985280036926268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,48,1,128,1,fp8,fp8,0,0.14528319835662842
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,48,2,128,1,float16,fp8,0,0.1455407977104187
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,48,2,128,1,fp8,fp8,0,0.14629600048065186
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,48,4,128,1,float16,float16,0,0.15037920475006103
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,48,4,128,1,float16,fp8,0,0.14462239742279054
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,48,4,128,1,fp8,fp8,0,0.14581600427627564
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,48,8,128,1,float16,float16,0,0.15438879728317262
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,48,8,128,1,float16,fp8,0,0.14498080015182496
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,48,48,128,1,float16,float16,0,0.11285120248794556
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,48,8,128,1,fp8,fp8,0,0.1456768035888672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,48,48,128,1,float16,fp8,0,0.10475039482116699
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,48,1,128,1,float16,float16,0,0.0828320026397705
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,48,1,128,1,float16,fp8,0,0.08007680177688599
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,48,48,128,1,fp8,fp8,0,0.10515199899673462
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,48,1,128,1,fp8,fp8,0,0.08099520206451416
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,48,2,128,1,float16,float16,0,0.082014399766922
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,48,2,128,1,float16,fp8,0,0.08036159873008727
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,48,2,128,1,fp8,fp8,0,0.08149440288543701
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,48,4,128,1,float16,float16,0,0.08372640013694763
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,48,4,128,1,float16,fp8,0,0.0804095983505249
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,48,4,128,1,fp8,fp8,0,0.08061280250549316
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,48,8,128,1,float16,float16,0,0.08746880292892456
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,48,8,128,1,float16,fp8,0,0.08056319952011108
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,48,8,128,1,fp8,fp8,0,0.0814736008644104
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,48,48,128,1,float16,fp8,0,0.060222399234771726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,48,48,128,1,float16,float16,0,0.06595519781112671
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,48,48,128,1,fp8,fp8,0,0.060395199060440066
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,48,1,128,1,float16,float16,0,0.04983200132846832
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,48,1,128,1,float16,fp8,0,0.0493120014667511
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,48,1,128,1,fp8,fp8,0,0.04948959946632385
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,48,2,128,1,float16,float16,0,0.04978080093860626
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,48,2,128,1,float16,fp8,0,0.04962239861488342
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,48,2,128,1,fp8,fp8,0,0.04957599937915802
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,48,4,128,1,float16,float16,0,0.0503216028213501
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,48,4,128,1,float16,fp8,0,0.04930239915847778
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,48,4,128,1,fp8,fp8,0,0.04941279888153076
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,48,8,128,1,float16,float16,0,0.051601600646972653
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,48,8,128,1,float16,fp8,0,0.04945760071277618
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,48,8,128,1,fp8,fp8,0,0.04949600100517273
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,48,48,128,1,float16,float16,0,0.039150398969650266
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,48,48,128,1,float16,fp8,0,0.04022560119628906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,48,48,128,1,fp8,fp8,0,0.039201599359512326
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,48,1,128,1,float16,float16,0,0.03507040143013
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,48,1,128,1,float16,fp8,0,0.03506399989128113
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,48,1,128,1,fp8,fp8,0,0.034964799880981445
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,48,2,128,1,float16,float16,0,0.03498240113258362
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,48,2,128,1,fp8,fp8,0,0.03490079939365387
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,48,4,128,1,float16,float16,0,0.035132798552513125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,48,2,128,1,float16,fp8,0,0.03494719862937927
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,48,4,128,1,fp8,fp8,0,0.03485920131206512
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,48,4,128,1,float16,fp8,0,0.035236799716949464
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,48,8,128,1,float16,float16,0,0.03580960035324097
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,48,8,128,1,float16,fp8,0,0.03496319949626923
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,48,8,128,1,fp8,fp8,0,0.03489919900894165
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,48,1,128,1,float16,float16,0,1.4862624168395997
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,48,1,128,1,float16,fp8,0,1.5494223594665528
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,48,2,128,1,float16,float16,0,1.4778847694396973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,48,1,128,1,fp8,fp8,0,1.5544655799865723
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,48,2,128,1,fp8,fp8,0,1.5431952476501465
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,48,4,128,1,float16,fp8,0,1.5480511665344239
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,48,4,128,1,float16,float16,0,1.6233247756958007
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,48,2,128,1,float16,fp8,0,1.6676591873168944
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,48,4,128,1,fp8,fp8,0,1.5416624069213867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,48,8,128,1,float16,float16,0,1.5871824264526366
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,48,8,128,1,fp8,fp8,0,1.5456751823425292
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,48,8,128,1,float16,fp8,0,1.5911423683166503
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,48,1,128,1,float16,float16,0,0.7546463966369629
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,48,48,128,1,float16,float16,0,1.2176336288452148
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,48,48,128,1,fp8,fp8,0,1.1370112419128418
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,48,48,128,1,float16,fp8,0,1.263862419128418
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,48,1,128,1,float16,fp8,0,0.8197903633117676
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,48,2,128,1,float16,float16,0,0.7528800010681153
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,48,1,128,1,fp8,fp8,0,0.8288288116455078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,48,2,128,1,float16,fp8,0,0.7845168113708496
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,48,2,128,1,fp8,fp8,0,0.7858911991119385
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,48,4,128,1,float16,float16,0,0.7711760044097901
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,48,4,128,1,float16,fp8,0,0.7822624206542969
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,48,4,128,1,fp8,fp8,0,0.7837391853332519
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,48,8,128,1,float16,float16,0,0.8078175544738769
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,48,8,128,1,float16,fp8,0,0.778329610824585
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,48,8,128,1,fp8,fp8,0,0.781718397140503
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,48,48,128,1,float16,float16,0,0.6190415859222412
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,48,48,128,1,float16,fp8,0,0.5780672073364258
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,48,48,128,1,fp8,fp8,0,0.5768288135528564
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,48,1,128,1,float16,float16,0,0.3900079965591431
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,48,1,128,1,float16,fp8,0,0.40177597999572756
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,48,1,128,1,fp8,fp8,0,0.40289921760559083
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,48,2,128,1,float16,float16,0,0.3892064094543457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,48,2,128,1,fp8,fp8,0,0.4021327972412109
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,48,2,128,1,float16,fp8,0,0.40102081298828124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,48,4,128,1,float16,float16,0,0.398417592048645
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,48,4,128,1,float16,fp8,0,0.4012415885925293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,48,4,128,1,fp8,fp8,0,0.39999520778656006
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,48,8,128,1,float16,float16,0,0.4182752132415771
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,48,48,128,1,float16,float16,0,0.3200992107391357
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,48,8,128,1,fp8,fp8,0,0.3979120016098022
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,48,8,128,1,float16,fp8,0,0.3999583959579468
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,48,1,128,1,float16,float16,0,0.2026304006576538
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,48,48,128,1,float16,fp8,0,0.299073600769043
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,48,48,128,1,fp8,fp8,0,0.2982239961624146
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,48,1,128,1,float16,fp8,0,0.2091536045074463
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,48,1,128,1,fp8,fp8,0,0.2111936092376709
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,48,2,128,1,float16,float16,0,0.2045056104660034
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,48,2,128,1,float16,fp8,0,0.20915040969848633
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,48,2,128,1,fp8,fp8,0,0.20979681015014648
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,48,4,128,1,float16,float16,0,0.20889759063720703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,48,4,128,1,float16,fp8,0,0.20888640880584716
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,48,4,128,1,fp8,fp8,0,0.21018240451812745
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,48,8,128,1,float16,float16,0,0.2189552068710327
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,48,8,128,1,float16,fp8,0,0.2081007957458496
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,48,8,128,1,fp8,fp8,0,0.2082848072052002
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,48,48,128,1,float16,float16,0,0.1677248001098633
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,48,1,128,1,float16,float16,0,0.11278719902038574
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,48,48,128,1,float16,fp8,0,0.15843360424041747
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,48,1,128,1,float16,fp8,0,0.1125040054321289
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,48,48,128,1,fp8,fp8,0,0.1597599983215332
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,48,1,128,1,fp8,fp8,0,0.11398240327835082
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,48,2,128,1,float16,float16,0,0.11104320287704468
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,48,2,128,1,float16,fp8,0,0.11245759725570678
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,48,2,128,1,fp8,fp8,0,0.11371519565582275
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,48,4,128,1,float16,float16,0,0.11408799886703491
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,48,4,128,1,float16,fp8,0,0.11353600025177002
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,48,4,128,1,fp8,fp8,0,0.11176639795303345
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,48,8,128,1,float16,fp8,0,0.11291199922561646
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,48,8,128,1,fp8,fp8,0,0.11155999898910522
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,48,8,128,1,float16,float16,0,0.1198464035987854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,48,48,128,1,float16,float16,0,0.09151359796524047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,48,48,128,1,float16,fp8,0,0.0863856017589569
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,48,1,128,1,float16,float16,0,0.061843198537826535
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,48,48,128,1,fp8,fp8,0,0.08526880145072938
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,48,1,128,1,float16,fp8,0,0.06158080101013184
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,48,1,128,1,fp8,fp8,0,0.061559998989105226
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,48,2,128,1,float16,float16,0,0.061768001317977904
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,48,2,128,1,float16,fp8,0,0.061694401502609256
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,48,2,128,1,fp8,fp8,0,0.061750400066375735
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,48,4,128,1,float16,float16,0,0.06372640132904053
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,48,4,128,1,fp8,fp8,0,0.06167680025100708
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,48,4,128,1,float16,fp8,0,0.062212800979614256
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,48,8,128,1,float16,float16,0,0.06604160070419311
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,48,8,128,1,float16,fp8,0,0.061689597368240354
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,48,8,128,1,fp8,fp8,0,0.061607998609542844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,48,48,128,1,float16,float16,0,0.05415840148925781
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,48,48,128,1,float16,fp8,0,0.04945760071277618
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,48,48,128,1,fp8,fp8,0,0.04932479858398438
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,48,1,128,1,float16,float16,0,0.03713920116424561
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,48,1,128,1,float16,fp8,0,0.038196799159049985
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,48,1,128,1,fp8,fp8,0,0.038468798995018004
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,48,2,128,1,float16,float16,0,0.03734880089759827
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,48,2,128,1,float16,fp8,0,0.03707840144634247
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,48,4,128,1,float16,float16,0,0.03893280029296875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,48,2,128,1,fp8,fp8,0,0.037678399682044984
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,48,4,128,1,float16,fp8,0,0.0372624009847641
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,48,4,128,1,fp8,fp8,0,0.03704800009727478
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,48,8,128,1,float16,float16,0,0.03918719887733459
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,48,8,128,1,float16,fp8,0,0.0373744010925293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,48,8,128,1,fp8,fp8,0,0.037363201379776
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,48,48,128,1,float16,float16,0,0.030019199848175047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,48,48,128,1,float16,fp8,0,0.03304480016231537
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,48,1,128,1,float16,float16,0,0.026862400770187377
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,48,48,128,1,fp8,fp8,0,0.03267520070075989
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,48,1,128,1,float16,fp8,0,0.026894399523735048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,48,1,128,1,fp8,fp8,0,0.026817598938941957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,48,2,128,1,float16,float16,0,0.026791998744010927
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,48,2,128,1,fp8,fp8,0,0.02682879865169525
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,48,2,128,1,float16,fp8,0,0.02675040066242218
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,48,4,128,1,float16,float16,0,0.0268640011548996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,48,4,128,1,float16,fp8,0,0.02677280008792877
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,48,4,128,1,fp8,fp8,0,0.026868799328804018
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,48,8,128,1,float16,float16,0,0.02714880108833313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,48,8,128,1,float16,fp8,0,0.02680320143699646
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,48,8,128,1,fp8,fp8,0,0.026766398549079896
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,48,48,128,1,float16,float16,0,0.02062239944934845
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,48,48,128,1,float16,fp8,0,0.020803199708461763
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,48,48,128,1,fp8,fp8,0,0.020790399610996248
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,48,1,128,1,float16,float16,0,0.018624000251293182
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,48,1,128,1,float16,fp8,0,0.01855839937925339
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,48,1,128,1,fp8,fp8,0,0.01863519996404648
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,48,2,128,1,float16,float16,0,0.018723200261592864
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,48,2,128,1,float16,fp8,0,0.018595199286937713
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,48,2,128,1,fp8,fp8,0,0.018671999871730804
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,48,4,128,1,float16,fp8,0,0.01857759952545166
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,48,4,128,1,float16,float16,0,0.01881760060787201
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,48,8,128,1,float16,float16,0,0.01863040030002594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,48,4,128,1,fp8,fp8,0,0.018676799535751343
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,48,8,128,1,fp8,fp8,0,0.018592000007629395
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,48,8,128,1,float16,fp8,0,0.018700799345970152
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,48,1,128,1,float16,float16,0,0.6155663967132569
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,48,1,128,1,float16,fp8,0,0.6600639820098877
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,48,1,128,1,fp8,fp8,0,0.6572175979614258
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,48,2,128,1,float16,float16,0,0.6125184059143066
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,48,2,128,1,float16,fp8,0,0.6588543891906739
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,48,2,128,1,fp8,fp8,0,0.656276798248291
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,48,4,128,1,float16,float16,0,0.6312784194946289
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,48,4,128,1,float16,fp8,0,0.6575647830963135
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,48,4,128,1,fp8,fp8,0,0.654852819442749
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,48,8,128,1,float16,float16,0,0.6680255889892578
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,48,8,128,1,float16,fp8,0,0.6563039779663086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,48,8,128,1,fp8,fp8,0,0.6543903827667237
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,48,48,128,1,float16,float16,0,0.5427040100097656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,48,48,128,1,float16,fp8,0,0.510529613494873
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,48,48,128,1,fp8,fp8,0,0.5119423866271973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,48,1,128,1,float16,float16,0,0.3176975965499878
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,48,1,128,1,float16,fp8,0,0.33810560703277587
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,48,1,128,1,fp8,fp8,0,0.33777120113372805
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,48,2,128,1,float16,float16,0,0.3158224105834961
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,48,2,128,1,float16,fp8,0,0.3365488052368164
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,48,2,128,1,fp8,fp8,0,0.3369343996047974
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,48,4,128,1,float16,float16,0,0.32425599098205565
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,48,4,128,1,float16,fp8,0,0.33648478984832764
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,48,4,128,1,fp8,fp8,0,0.33650400638580324
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,48,8,128,1,float16,float16,0,0.34352641105651854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,48,8,128,1,float16,fp8,0,0.33570399284362795
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,48,8,128,1,fp8,fp8,0,0.3352479934692383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,48,48,128,1,float16,float16,0,0.28016960620880127
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,48,48,128,1,float16,fp8,0,0.26471519470214844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,48,1,128,1,float16,float16,0,0.16998399496078492
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,48,48,128,1,fp8,fp8,0,0.26580960750579835
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,48,1,128,1,float16,fp8,0,0.17657920122146606
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,48,1,128,1,fp8,fp8,0,0.17779359817504883
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,48,2,128,1,float16,float16,0,0.16893919706344604
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,48,2,128,1,float16,fp8,0,0.17718559503555298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,48,2,128,1,fp8,fp8,0,0.17692320346832274
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,48,4,128,1,float16,float16,0,0.17311520576477052
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,48,4,128,1,float16,fp8,0,0.17742079496383667
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,48,4,128,1,fp8,fp8,0,0.17645759582519532
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,48,8,128,1,float16,float16,0,0.18046239614486695
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,48,8,128,1,float16,fp8,0,0.17702720165252686
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,48,48,128,1,float16,float16,0,0.14828480482101442
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,48,8,128,1,fp8,fp8,0,0.17744319438934325
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,48,48,128,1,float16,fp8,0,0.1404703974723816
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,48,1,128,1,float16,float16,0,0.09263520240783692
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,48,1,128,1,float16,fp8,0,0.09680160284042358
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,48,48,128,1,fp8,fp8,0,0.14162399768829345
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,48,2,128,1,float16,float16,0,0.09232640266418457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,48,2,128,1,float16,fp8,0,0.09682719707489014
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,48,2,128,1,fp8,fp8,0,0.09793919920921326
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,48,1,128,1,fp8,fp8,0,0.09632319808006287
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,48,4,128,1,float16,fp8,0,0.09684159755706787
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,48,4,128,1,float16,float16,0,0.09499359726905823
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,48,4,128,1,fp8,fp8,0,0.09702079892158508
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,48,8,128,1,float16,float16,0,0.09940159916877747
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,48,48,128,1,float16,float16,0,0.08386399745941162
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,48,8,128,1,float16,fp8,0,0.09659839868545532
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,48,8,128,1,fp8,fp8,0,0.09641439914703369
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,48,48,128,1,float16,fp8,0,0.07834240198135375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,48,1,128,1,float16,float16,0,0.053294402360916135
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,48,1,128,1,float16,fp8,0,0.054756802320480344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,48,48,128,1,fp8,fp8,0,0.07829920053482056
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,48,1,128,1,fp8,fp8,0,0.05416479706764221
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,48,2,128,1,float16,float16,0,0.053620797395706174
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,48,2,128,1,float16,fp8,0,0.05501440167427063
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,48,2,128,1,fp8,fp8,0,0.054104000329971313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,48,4,128,1,float16,float16,0,0.05547999739646912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,48,4,128,1,float16,fp8,0,0.05485600233078003
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,48,4,128,1,fp8,fp8,0,0.05441120266914368
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,48,8,128,1,float16,float16,0,0.057651197910308837
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,48,8,128,1,float16,fp8,0,0.05435839891433716
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,48,48,128,1,float16,float16,0,0.04710719883441925
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,48,8,128,1,fp8,fp8,0,0.05545439720153809
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,48,48,128,1,float16,fp8,0,0.043201598525047305
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,48,1,128,1,float16,float16,0,0.030888000130653383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,48,1,128,1,float16,fp8,0,0.03113119900226593
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,48,48,128,1,fp8,fp8,0,0.04317440092563629
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,48,1,128,1,fp8,fp8,0,0.03094559907913208
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,48,2,128,1,float16,float16,0,0.030817601084709167
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,48,2,128,1,float16,fp8,0,0.031001600623130798
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,48,4,128,1,float16,float16,0,0.030935999751091004
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,48,2,128,1,fp8,fp8,0,0.030928000807762146
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,48,4,128,1,float16,fp8,0,0.030985599756240843
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,48,4,128,1,fp8,fp8,0,0.031241598725318908
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,48,8,128,1,float16,float16,0,0.03293280005455017
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,48,8,128,1,float16,fp8,0,0.030888000130653383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,48,8,128,1,fp8,fp8,0,0.03108159899711609
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,48,48,128,1,float16,float16,0,0.026900801062583923
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,48,48,128,1,float16,fp8,0,0.028923198580741882
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,48,48,128,1,fp8,fp8,0,0.028814399242401124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,48,1,128,1,float16,float16,0,0.02282239943742752
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,48,1,128,1,float16,fp8,0,0.022921599447727203
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,48,1,128,1,fp8,fp8,0,0.022801600396633148
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,48,2,128,1,float16,float16,0,0.022750400006771088
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,48,2,128,1,float16,fp8,0,0.02292959988117218
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,48,2,128,1,fp8,fp8,0,0.022753599286079406
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,48,4,128,1,float16,float16,0,0.02276639938354492
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,48,4,128,1,float16,fp8,0,0.023095999658107758
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,48,4,128,1,fp8,fp8,0,0.02276960015296936
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,48,8,128,1,float16,float16,0,0.024697600305080412
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,48,8,128,1,float16,fp8,0,0.02287199944257736
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,48,48,128,1,float16,float16,0,0.01860000044107437
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,48,8,128,1,fp8,fp8,0,0.02271520048379898
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,48,48,128,1,float16,fp8,0,0.01874080002307892
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,48,48,128,1,fp8,fp8,0,0.01866080015897751
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,48,1,128,1,float16,float16,0,0.016524800658226015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,48,1,128,1,float16,fp8,0,0.016510400176048278
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,48,2,128,1,float16,float16,0,0.015719999372959138
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,48,1,128,1,fp8,fp8,0,0.016473600268363954
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,48,2,128,1,float16,fp8,0,0.01655520051717758
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,48,2,128,1,fp8,fp8,0,0.016569599509239197
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,48,4,128,1,float16,float16,0,0.016551999747753142
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,48,4,128,1,float16,fp8,0,0.016540800034999848
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,48,4,128,1,fp8,fp8,0,0.016543999314308167
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,48,8,128,1,float16,float16,0,0.016499200463294984
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,48,48,128,1,float16,float16,0,0.01669439971446991
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,48,8,128,1,float16,fp8,0,0.01656640022993088
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,48,8,128,1,fp8,fp8,0,0.01656640022993088
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,48,48,128,1,float16,fp8,0,0.016686399281024934
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,48,1,128,1,float16,float16,0,0.014593599736690522
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,48,48,128,1,fp8,fp8,0,0.016752000153064727
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,48,1,128,1,float16,fp8,0,0.015479999780654907
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,48,2,128,1,float16,float16,0,0.01639840006828308
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,48,1,128,1,fp8,fp8,0,0.01459999978542328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,48,2,128,1,float16,fp8,0,0.016200000047683717
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,48,4,128,1,float16,float16,0,0.01456640064716339
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,48,2,128,1,fp8,fp8,0,0.014660799503326416
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,48,4,128,1,float16,fp8,0,0.015963199734687804
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,48,8,128,1,float16,float16,0,0.01459999978542328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,48,4,128,1,fp8,fp8,0,0.014553600549697876
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,48,8,128,1,float16,fp8,0,0.016198399662971496
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,48,8,128,1,fp8,fp8,0,0.014678399264812469
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,48,1,128,1,float16,float16,0,0.375380802154541
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,48,1,128,1,float16,fp8,0,0.3972687959671021
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,48,1,128,1,fp8,fp8,0,0.4000671863555908
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,48,2,128,1,float16,float16,0,0.3750992059707642
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,48,2,128,1,float16,fp8,0,0.39611680507659913
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,48,2,128,1,fp8,fp8,0,0.3994191884994507
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,48,4,128,1,float16,float16,0,0.38382079601287844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,48,4,128,1,float16,fp8,0,0.3954751968383789
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,48,4,128,1,fp8,fp8,0,0.3980207920074463
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,48,8,128,1,float16,float16,0,0.40124001502990725
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,48,8,128,1,float16,fp8,0,0.39486079216003417
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,48,8,128,1,fp8,fp8,0,0.3975215911865234
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,48,48,128,1,float16,float16,0,0.30478079319000245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,48,48,128,1,float16,fp8,0,0.29226720333099365
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,48,48,128,1,fp8,fp8,0,0.2936320066452026
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,48,1,128,1,float16,float16,0,0.19521600008010864
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,48,1,128,1,float16,fp8,0,0.2049936056137085
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,48,1,128,1,fp8,fp8,0,0.20592958927154542
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,48,2,128,1,float16,float16,0,0.19427520036697388
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,48,2,128,1,float16,fp8,0,0.20465919971466065
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,48,2,128,1,fp8,fp8,0,0.20533280372619628
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,48,4,128,1,float16,float16,0,0.19924960136413575
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,48,4,128,1,float16,fp8,0,0.20486080646514893
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,48,4,128,1,fp8,fp8,0,0.20520639419555664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,48,8,128,1,float16,float16,0,0.2081968069076538
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,48,8,128,1,float16,fp8,0,0.20425279140472413
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,48,8,128,1,fp8,fp8,0,0.2050175905227661
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,48,48,128,1,float16,float16,0,0.1586384057998657
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,48,48,128,1,float16,fp8,0,0.1539039969444275
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,48,48,128,1,fp8,fp8,0,0.15418720245361328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,48,1,128,1,float16,fp8,0,0.10908479690551758
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,48,1,128,1,float16,float16,0,0.10486400127410889
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,48,2,128,1,float16,float16,0,0.10627199411392212
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,48,1,128,1,fp8,fp8,0,0.10892479419708252
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,48,2,128,1,float16,fp8,0,0.10903040170669556
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,48,2,128,1,fp8,fp8,0,0.10900319814682007
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,48,4,128,1,float16,float16,0,0.10773439407348633
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,48,4,128,1,float16,fp8,0,0.10886399745941162
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,48,4,128,1,fp8,fp8,0,0.10893440246582031
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,48,8,128,1,float16,float16,0,0.11176160573959351
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,48,8,128,1,float16,fp8,0,0.1093440055847168
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,48,8,128,1,fp8,fp8,0,0.10886240005493164
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,48,48,128,1,float16,float16,0,0.08471199870109558
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,48,1,128,1,float16,float16,0,0.05806559920310974
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,48,48,128,1,fp8,fp8,0,0.08403840065002441
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,48,1,128,1,float16,fp8,0,0.059569597244262695
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,48,48,128,1,float16,fp8,0,0.08269919753074646
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,48,1,128,1,fp8,fp8,0,0.05960639715194702
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,48,2,128,1,float16,float16,0,0.05896639823913574
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,48,2,128,1,float16,fp8,0,0.05962399840354919
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,48,2,128,1,fp8,fp8,0,0.05971519947052002
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,48,4,128,1,float16,float16,0,0.05973280072212219
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,48,4,128,1,float16,fp8,0,0.059811198711395265
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,48,4,128,1,fp8,fp8,0,0.059569597244262695
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,48,8,128,1,float16,fp8,0,0.05956320166587829
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,48,8,128,1,float16,float16,0,0.062063997983932494
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,48,8,128,1,fp8,fp8,0,0.05961599946022034
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,48,48,128,1,float16,float16,0,0.04930399954319
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,48,48,128,1,float16,fp8,0,0.0464464008808136
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,48,1,128,1,float16,float16,0,0.03514559864997864
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,48,48,128,1,fp8,fp8,0,0.046254399418830874
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,48,1,128,1,float16,fp8,0,0.035094401240348815
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,48,1,128,1,fp8,fp8,0,0.035062399506568906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,48,2,128,1,float16,float16,0,0.03480800092220306
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,48,2,128,1,float16,fp8,0,0.03511520028114319
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,48,2,128,1,fp8,fp8,0,0.03514719903469086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,48,4,128,1,float16,fp8,0,0.03511840105056763
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,48,4,128,1,fp8,fp8,0,0.03509120047092438
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,48,4,128,1,float16,float16,0,0.035139200091362
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,48,8,128,1,float16,float16,0,0.035062399506568906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,48,8,128,1,float16,fp8,0,0.03532640039920807
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,48,8,128,1,fp8,fp8,0,0.035206401348114015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,48,48,128,1,float16,float16,0,0.024910399317741395
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,48,48,128,1,float16,fp8,0,0.02691200077533722
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,48,1,128,1,float16,float16,0,0.020761600136756896
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,48,48,128,1,fp8,fp8,0,0.026969599723815917
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,48,1,128,1,float16,fp8,0,0.02268480062484741
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,48,1,128,1,fp8,fp8,0,0.022566400468349457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,48,2,128,1,float16,fp8,0,0.022598400712013245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,48,2,128,1,float16,float16,0,0.020846399664878845
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,48,2,128,1,fp8,fp8,0,0.02263360023498535
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,48,4,128,1,float16,float16,0,0.020844799280166627
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,48,4,128,1,fp8,fp8,0,0.022655999660491942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,48,8,128,1,float16,float16,0,0.02266400009393692
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,48,4,128,1,float16,fp8,0,0.022864000499248506
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,48,8,128,1,float16,fp8,0,0.02260800004005432
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,48,8,128,1,fp8,fp8,0,0.02266400009393692
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,48,48,128,1,float16,float16,0,0.018807999789714813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,48,48,128,1,float16,fp8,0,0.018812799453735353
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,48,48,128,1,fp8,fp8,0,0.019670400023460387
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,48,1,128,1,float16,float16,0,0.01672160029411316
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,48,1,128,1,float16,fp8,0,0.01671839952468872
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,48,1,128,1,fp8,fp8,0,0.01672479957342148
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,48,2,128,1,float16,float16,0,0.016708800196647645
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,48,2,128,1,float16,fp8,0,0.016708800196647645
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,48,2,128,1,fp8,fp8,0,0.016752000153064727
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,48,4,128,1,float16,fp8,0,0.01674560010433197
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,48,4,128,1,fp8,fp8,0,0.016705599427223206
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,48,4,128,1,float16,float16,0,0.01666879951953888
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,48,8,128,1,float16,float16,0,0.016704000532627106
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,48,8,128,1,float16,fp8,0,0.016816000640392303
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,48,8,128,1,fp8,fp8,0,0.016715200245380403
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,48,48,128,1,float16,float16,0,0.014452800154685974
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,48,48,128,1,float16,fp8,0,0.01446560025215149
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,48,48,128,1,fp8,fp8,0,0.014569599926471711
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,48,1,128,1,float16,float16,0,0.012590399384498597
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,48,1,128,1,float16,fp8,0,0.012580800056457519
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,48,2,128,1,float16,float16,0,0.012544000148773193
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,48,1,128,1,fp8,fp8,0,0.012540799379348756
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,48,2,128,1,float16,fp8,0,0.012563200294971466
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,48,4,128,1,float16,fp8,0,0.012571200728416443
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,48,4,128,1,float16,float16,0,0.012520000338554382
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,48,2,128,1,fp8,fp8,0,0.012548799812793731
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,48,4,128,1,fp8,fp8,0,0.012511999905109405
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,48,8,128,1,float16,fp8,0,0.012729600071907043
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,48,8,128,1,float16,float16,0,0.012479999661445617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,48,8,128,1,fp8,fp8,0,0.01249760016798973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,48,48,128,1,float16,float16,0,0.012676799297332763
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,48,48,128,1,float16,fp8,0,0.012579199671745301
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,48,1,128,1,float16,float16,0,0.012503999471664428
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,48,48,128,1,fp8,fp8,0,0.012547199428081513
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,48,1,128,1,float16,fp8,0,0.011711999773979187
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,48,1,128,1,fp8,fp8,0,0.012432000041007996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,48,2,128,1,float16,float16,0,0.012361600250005721
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,48,2,128,1,float16,fp8,0,0.012444800138473511
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,48,2,128,1,fp8,fp8,0,0.012387199699878693
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,48,4,128,1,float16,float16,0,0.012444800138473511
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,48,4,128,1,fp8,fp8,0,0.012518399953842163
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,48,8,128,1,float16,float16,0,0.012409599870443344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,48,4,128,1,float16,fp8,0,0.01249919980764389
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,48,8,128,1,float16,fp8,0,0.01255040019750595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,48,8,128,1,fp8,fp8,0,0.012444800138473511
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,48,1,128,1,float16,float16,0,0.2939455986022949
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,48,1,128,1,float16,fp8,0,0.3027168035507202
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,48,1,128,1,fp8,fp8,0,0.30497279167175295
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,48,2,128,1,fp8,fp8,0,0.30443999767303465
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,48,2,128,1,float16,float16,0,0.2937743902206421
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,48,2,128,1,float16,fp8,0,0.30177760124206543
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,48,4,128,1,float16,float16,0,0.30095999240875243
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,48,4,128,1,fp8,fp8,0,0.3017136096954346
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,48,4,128,1,float16,fp8,0,0.30351839065551756
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,48,8,128,1,float16,float16,0,0.30940320491790774
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,48,8,128,1,float16,fp8,0,0.30139200687408446
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,48,8,128,1,fp8,fp8,0,0.30351040363311765
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,48,48,128,1,float16,float16,0,0.2088912010192871
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,48,48,128,1,float16,fp8,0,0.2012336015701294
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,48,48,128,1,fp8,fp8,0,0.2009984016418457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,48,1,128,1,float16,float16,0,0.15502239465713502
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,48,1,128,1,float16,fp8,0,0.15667999982833863
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,48,1,128,1,fp8,fp8,0,0.15732640027999878
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,48,2,128,1,float16,float16,0,0.15575679540634155
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,48,2,128,1,float16,fp8,0,0.15733760595321655
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,48,2,128,1,fp8,fp8,0,0.15777920484542846
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,48,4,128,1,float16,float16,0,0.1578271985054016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,48,4,128,1,float16,fp8,0,0.15728960037231446
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,48,4,128,1,fp8,fp8,0,0.156331205368042
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,48,8,128,1,float16,float16,0,0.1624608039855957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,48,8,128,1,float16,fp8,0,0.1567247986793518
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,48,8,128,1,fp8,fp8,0,0.1568560004234314
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,48,48,128,1,float16,float16,0,0.1095695972442627
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,48,48,128,1,float16,fp8,0,0.10684959888458252
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,48,48,128,1,fp8,fp8,0,0.10677440166473388
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,48,1,128,1,float16,fp8,0,0.08421279788017273
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,48,1,128,1,float16,float16,0,0.08439679741859436
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,48,1,128,1,fp8,fp8,0,0.08239679932594299
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,48,2,128,1,float16,float16,0,0.08365920186042786
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,48,2,128,1,float16,fp8,0,0.08294559717178344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,48,2,128,1,fp8,fp8,0,0.0835536003112793
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,48,4,128,1,float16,float16,0,0.08474720120429993
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,48,4,128,1,float16,fp8,0,0.08299199938774109
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,48,4,128,1,fp8,fp8,0,0.08425279855728149
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,48,8,128,1,float16,float16,0,0.08712800145149231
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,48,48,128,1,float16,float16,0,0.06171839833259583
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,48,8,128,1,fp8,fp8,0,0.08323519825935363
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,48,8,128,1,float16,fp8,0,0.08350080251693726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,48,48,128,1,float16,fp8,0,0.05771200060844421
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,48,48,128,1,fp8,fp8,0,0.0577023983001709
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,48,1,128,1,float16,float16,0,0.04580959975719452
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,48,1,128,1,float16,fp8,0,0.04738560020923614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,48,1,128,1,fp8,fp8,0,0.04730400145053863
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,48,2,128,1,float16,float16,0,0.04584800004959107
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,48,2,128,1,float16,fp8,0,0.047328001260757445
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,48,2,128,1,fp8,fp8,0,0.04732320010662079
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,48,4,128,1,float16,float16,0,0.047547200322151185
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,48,4,128,1,float16,fp8,0,0.047270399332046506
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,48,4,128,1,fp8,fp8,0,0.0472927987575531
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,48,8,128,1,float16,fp8,0,0.047305598855018616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,48,8,128,1,float16,float16,0,0.047742399573326114
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,48,8,128,1,fp8,fp8,0,0.04739679992198944
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,48,48,128,1,float16,float16,0,0.031916800141334536
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,48,48,128,1,float16,fp8,0,0.03503200113773346
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,48,1,128,1,float16,float16,0,0.028814399242401124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,48,48,128,1,fp8,fp8,0,0.033025598526000975
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,48,1,128,1,fp8,fp8,0,0.028863999247550964
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,48,1,128,1,float16,fp8,0,0.029016000032424927
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,48,2,128,1,float16,float16,0,0.02876160144805908
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,48,2,128,1,float16,fp8,0,0.028809601068496705
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,48,4,128,1,float16,float16,0,0.0288783997297287
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,48,2,128,1,fp8,fp8,0,0.028880000114440918
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,48,4,128,1,float16,fp8,0,0.02884959876537323
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,48,4,128,1,fp8,fp8,0,0.028884801268577575
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,48,8,128,1,float16,float16,0,0.028838399052619933
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,48,8,128,1,float16,fp8,0,0.02893120050430298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,48,8,128,1,fp8,fp8,0,0.02885119915008545
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,48,48,128,1,float16,float16,0,0.020692799985408784
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,48,48,128,1,float16,fp8,0,0.020785599946975708
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,48,48,128,1,fp8,fp8,0,0.02067999988794327
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,48,1,128,1,float16,float16,0,0.01855199933052063
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,48,1,128,1,float16,fp8,0,0.018727999925613404
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,48,1,128,1,fp8,fp8,0,0.0186256006360054
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,48,2,128,1,float16,float16,0,0.018636800348758698
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,48,2,128,1,float16,fp8,0,0.018795199692249298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,48,2,128,1,fp8,fp8,0,0.018614399433135986
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,48,4,128,1,float16,fp8,0,0.018606400489807128
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,48,4,128,1,float16,float16,0,0.018559999763965607
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,48,4,128,1,fp8,fp8,0,0.018639999628067016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,48,8,128,1,float16,float16,0,0.01874080002307892
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,48,8,128,1,float16,fp8,0,0.01870719939470291
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,48,8,128,1,fp8,fp8,0,0.018644799292087556
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,48,48,128,1,float16,float16,0,0.01648640036582947
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,48,48,128,1,float16,fp8,0,0.01659359931945801
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,48,1,128,1,float16,float16,0,0.01462559998035431
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,48,48,128,1,fp8,fp8,0,0.016612799465656282
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,48,1,128,1,float16,fp8,0,0.014665600657463074
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,48,2,128,1,float16,float16,0,0.014555199444293976
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,48,1,128,1,fp8,fp8,0,0.014627200365066529
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,48,2,128,1,float16,fp8,0,0.014511999487876893
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,48,4,128,1,float16,float16,0,0.014683200418949128
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,48,2,128,1,fp8,fp8,0,0.014504000544548035
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,48,4,128,1,float16,fp8,0,0.014504000544548035
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,48,4,128,1,fp8,fp8,0,0.014667199552059173
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,48,8,128,1,float16,float16,0,0.01449279934167862
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,48,8,128,1,float16,fp8,0,0.014553600549697876
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,48,8,128,1,fp8,fp8,0,0.014587199687957764
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,48,48,128,1,float16,fp8,0,0.012449599802494049
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,48,48,128,1,float16,float16,0,0.012494400143623352
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,48,48,128,1,fp8,fp8,0,0.012587200105190276
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,48,1,128,1,float16,float16,0,0.010523200035095215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,48,1,128,1,float16,fp8,0,0.01048159971833229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,48,1,128,1,fp8,fp8,0,0.012361600250005721
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,48,2,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,48,2,128,1,float16,fp8,0,0.010718400031328202
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,48,2,128,1,fp8,fp8,0,0.012425599992275238
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,48,4,128,1,float16,float16,0,0.010492800176143647
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,48,4,128,1,float16,fp8,0,0.010903999954462052
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,48,4,128,1,fp8,fp8,0,0.012433599680662155
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,48,8,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,48,8,128,1,fp8,fp8,0,0.011102399975061416
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,48,8,128,1,float16,fp8,0,0.011244799941778183
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,48,48,128,1,float16,fp8,0,0.010526400059461594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,48,48,128,1,fp8,fp8,0,0.010520000010728836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,48,48,128,1,float16,float16,0,0.012428800016641617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,48,1,128,1,float16,float16,0,0.01045759990811348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,48,1,128,1,float16,fp8,0,0.010462400317192078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,48,1,128,1,fp8,fp8,0,0.010425599664449692
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,48,2,128,1,float16,float16,0,0.010494399815797806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,48,2,128,1,float16,fp8,0,0.010422399640083313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,48,2,128,1,fp8,fp8,0,0.0105103999376297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,48,4,128,1,float16,float16,0,0.010516799986362457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,48,4,128,1,float16,fp8,0,0.010435199737548828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,48,4,128,1,fp8,fp8,0,0.010513599961996078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,48,8,128,1,float16,float16,0,0.010496000200510025
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,48,8,128,1,float16,fp8,0,0.010502400249242783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,48,8,128,1,fp8,fp8,0,0.01043199971318245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,48,1,128,1,float16,float16,0,0.25754239559173586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,48,1,128,1,float16,fp8,0,0.25843679904937744
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,48,1,128,1,fp8,fp8,0,0.2588191986083984
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,48,2,128,1,float16,float16,0,0.2545919895172119
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,48,2,128,1,float16,fp8,0,0.2584752082824707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,48,2,128,1,fp8,fp8,0,0.25849599838256837
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,48,4,128,1,float16,float16,0,0.2581183910369873
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,48,4,128,1,float16,fp8,0,0.2580352067947388
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,48,8,128,1,float16,float16,0,0.26282079219818116
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,48,4,128,1,fp8,fp8,0,0.2576303958892822
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,48,8,128,1,float16,fp8,0,0.2581952095031738
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,48,8,128,1,fp8,fp8,0,0.25867838859558107
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,48,48,128,1,float16,float16,0,0.1589840054512024
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,48,48,128,1,float16,fp8,0,0.15596959590911866
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,48,48,128,1,fp8,fp8,0,0.15683360099792482
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,48,1,128,1,float16,fp8,0,0.13339519500732422
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,48,1,128,1,float16,float16,0,0.13487520217895507
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,48,1,128,1,fp8,fp8,0,0.1334239959716797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,48,2,128,1,float16,float16,0,0.13360960483551027
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,48,2,128,1,float16,fp8,0,0.13328800201416016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,48,2,128,1,fp8,fp8,0,0.13349119424819947
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,48,4,128,1,float16,float16,0,0.13540159463882445
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,48,4,128,1,float16,fp8,0,0.13352160453796386
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,48,4,128,1,fp8,fp8,0,0.134171199798584
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,48,8,128,1,float16,float16,0,0.13701119422912597
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,48,8,128,1,float16,fp8,0,0.1334272027015686
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,48,8,128,1,fp8,fp8,0,0.13411680459976197
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,48,48,128,1,float16,float16,0,0.08571680188179016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,48,1,128,1,float16,float16,0,0.07015680074691773
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,48,48,128,1,fp8,fp8,0,0.08411679863929748
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,48,48,128,1,float16,fp8,0,0.08260480165481568
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,48,1,128,1,fp8,fp8,0,0.07197920083999634
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,48,1,128,1,float16,fp8,0,0.0707264006137848
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,48,2,128,1,float16,float16,0,0.07109599709510803
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,48,2,128,1,float16,fp8,0,0.07203360199928284
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,48,2,128,1,fp8,fp8,0,0.07103679776191711
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,48,4,128,1,float16,float16,0,0.0722495973110199
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,48,4,128,1,float16,fp8,0,0.07194560170173644
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,48,4,128,1,fp8,fp8,0,0.07114560008049012
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,48,8,128,1,float16,float16,0,0.07283520102500915
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,48,8,128,1,float16,fp8,0,0.07193920016288757
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,48,8,128,1,fp8,fp8,0,0.07190399765968322
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,48,48,128,1,float16,float16,0,0.045419201254844666
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,48,48,128,1,float16,fp8,0,0.04725599884986877
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,48,48,128,1,fp8,fp8,0,0.045311999320983884
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,48,1,128,1,float16,fp8,0,0.04116320013999939
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,48,1,128,1,float16,float16,0,0.04107039868831634
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,48,1,128,1,fp8,fp8,0,0.04118880033493042
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,48,2,128,1,float16,float16,0,0.041257598996162416
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,48,2,128,1,float16,fp8,0,0.04121919870376587
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,48,2,128,1,fp8,fp8,0,0.04116159975528717
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,48,4,128,1,float16,float16,0,0.041340801119804385
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,48,4,128,1,fp8,fp8,0,0.04113920032978058
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,48,4,128,1,float16,fp8,0,0.041233599185943604
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,48,8,128,1,float16,float16,0,0.04135839939117432
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,48,48,128,1,float16,float16,0,0.026984000205993654
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,48,8,128,1,float16,fp8,0,0.041206398606300355
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,48,8,128,1,fp8,fp8,0,0.041249600052833554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,48,48,128,1,float16,fp8,0,0.02877599895000458
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,48,48,128,1,fp8,fp8,0,0.028884801268577575
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,48,1,128,1,float16,float16,0,0.02502079904079437
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,48,1,128,1,float16,fp8,0,0.02494560033082962
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,48,1,128,1,fp8,fp8,0,0.024798400700092316
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,48,2,128,1,float16,float16,0,0.02507199943065643
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,48,2,128,1,float16,fp8,0,0.02503040134906769
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,48,2,128,1,fp8,fp8,0,0.024801599979400634
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,48,4,128,1,float16,fp8,0,0.025604799389839172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,48,4,128,1,float16,float16,0,0.02489439994096756
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,48,4,128,1,fp8,fp8,0,0.02481119930744171
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,48,8,128,1,float16,float16,0,0.026774400472640993
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,48,8,128,1,float16,fp8,0,0.025028800964355467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,48,8,128,1,fp8,fp8,0,0.02484000027179718
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,48,48,128,1,float16,float16,0,0.0187376007437706
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,48,48,128,1,float16,fp8,0,0.01870879977941513
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,48,48,128,1,fp8,fp8,0,0.01860480010509491
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,48,1,128,1,float16,fp8,0,0.016788800060749055
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,48,1,128,1,float16,float16,0,0.016841599345207216
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,48,1,128,1,fp8,fp8,0,0.016836799681186676
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,48,2,128,1,float16,float16,0,0.016539199650287627
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,48,2,128,1,float16,fp8,0,0.016731199622154237
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,48,2,128,1,fp8,fp8,0,0.016828800737857818
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,48,4,128,1,float16,float16,0,0.016758400201797485
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,48,4,128,1,float16,fp8,0,0.016598400473594666
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,48,4,128,1,fp8,fp8,0,0.01671680063009262
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,48,48,128,1,float16,float16,0,0.014630399644374847
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,48,8,128,1,fp8,fp8,0,0.016595199704170227
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,48,8,128,1,float16,float16,0,0.016652800142765045
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,48,8,128,1,float16,fp8,0,0.01661760061979294
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,48,48,128,1,fp8,fp8,0,0.014547200500965118
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,48,1,128,1,float16,float16,0,0.014404800534248353
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,48,48,128,1,float16,fp8,0,0.014584000408649444
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,48,1,128,1,float16,fp8,0,0.014468799531459808
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,48,1,128,1,fp8,fp8,0,0.014455999433994293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,48,2,128,1,float16,float16,0,0.01446239948272705
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,48,2,128,1,fp8,fp8,0,0.014454400539398194
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,48,2,128,1,float16,fp8,0,0.014502400159835815
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,48,4,128,1,float16,float16,0,0.014505599439144135
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,48,4,128,1,fp8,fp8,0,0.014448000490665436
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,48,4,128,1,float16,fp8,0,0.014574399590492249
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,48,8,128,1,float16,float16,0,0.014424000680446625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,48,8,128,1,float16,fp8,0,0.014455999433994293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,48,8,128,1,fp8,fp8,0,0.014449599385261535
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,48,48,128,1,float16,fp8,0,0.011360000073909759
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,48,48,128,1,float16,float16,0,0.012547199428081513
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,48,1,128,1,float16,float16,0,0.01053759977221489
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,48,48,128,1,fp8,fp8,0,0.010497599840164185
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,48,1,128,1,float16,fp8,0,0.010441599786281586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,48,1,128,1,fp8,fp8,0,0.010428799688816071
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,48,2,128,1,float16,float16,0,0.01053600013256073
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,48,2,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,48,2,128,1,fp8,fp8,0,0.010468800365924836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,48,4,128,1,float16,float16,0,0.010491199791431427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,48,4,128,1,float16,fp8,0,0.010412800312042236
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,48,4,128,1,fp8,fp8,0,0.010462400317192078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,48,8,128,1,float16,float16,0,0.01045759990811348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,48,8,128,1,float16,fp8,0,0.010452800244092942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,48,8,128,1,fp8,fp8,0,0.010494399815797806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,48,48,128,1,float16,float16,0,0.011872000247240066
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,48,48,128,1,float16,fp8,0,0.010480000078678131
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,48,48,128,1,fp8,fp8,0,0.010566399991512298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,48,1,128,1,float16,float16,0,0.010500799864530563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,48,1,128,1,float16,fp8,0,0.010424000024795533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,48,1,128,1,fp8,fp8,0,0.010467199981212616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,48,2,128,1,float16,float16,0,0.010393600165843963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,48,2,128,1,float16,fp8,0,0.010473600029945374
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,48,2,128,1,fp8,fp8,0,0.010526400059461594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,48,4,128,1,float16,float16,0,0.01043040007352829
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,48,4,128,1,float16,fp8,0,0.010480000078678131
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,48,4,128,1,fp8,fp8,0,0.010518400371074677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,48,8,128,1,float16,float16,0,0.010480000078678131
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,48,8,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,48,8,128,1,fp8,fp8,0,0.010406400263309478
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,48,1,128,1,float16,float16,0,0.2455440044403076
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,48,1,128,1,float16,fp8,0,0.23550078868865967
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,48,1,128,1,fp8,fp8,0,0.2358191967010498
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,48,2,128,1,float16,float16,0,0.2450160026550293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,48,2,128,1,fp8,fp8,0,0.23610079288482666
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,48,2,128,1,float16,fp8,0,0.23580479621887207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,48,4,128,1,float16,float16,0,0.24660000801086426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,48,4,128,1,float16,fp8,0,0.23616321086883546
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,48,4,128,1,fp8,fp8,0,0.23597118854522706
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,48,8,128,1,float16,fp8,0,0.2361504077911377
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,48,8,128,1,float16,float16,0,0.24827840328216552
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,48,8,128,1,fp8,fp8,0,0.2358112096786499
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,48,48,128,1,float16,float16,0,0.14331040382385254
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,48,48,128,1,float16,fp8,0,0.13334879875183106
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,48,48,128,1,fp8,fp8,0,0.13335039615631103
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,48,1,128,1,float16,float16,0,0.12804960012435912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,48,1,128,1,float16,fp8,0,0.12109119892120361
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,48,1,128,1,fp8,fp8,0,0.12111680507659912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,48,2,128,1,float16,float16,0,0.12790559530258178
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,48,2,128,1,float16,fp8,0,0.1212399959564209
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,48,2,128,1,fp8,fp8,0,0.12106399536132813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,48,4,128,1,float16,float16,0,0.12916799783706664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,48,4,128,1,float16,fp8,0,0.12116800546646118
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,48,4,128,1,fp8,fp8,0,0.12112480401992798
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,48,8,128,1,float16,float16,0,0.1299631953239441
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,48,8,128,1,float16,fp8,0,0.12147680521011353
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,48,8,128,1,fp8,fp8,0,0.12116960287094117
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,48,48,128,1,float16,float16,0,0.07416480183601379
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,48,48,128,1,float16,fp8,0,0.07163519859313965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,48,48,128,1,fp8,fp8,0,0.06987680196762085
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,48,1,128,1,float16,float16,0,0.06954560279846192
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,48,1,128,1,float16,fp8,0,0.0657920002937317
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,48,1,128,1,fp8,fp8,0,0.06568800210952759
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,48,2,128,1,float16,float16,0,0.07004960179328919
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,48,2,128,1,float16,fp8,0,0.06578239798545837
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,48,2,128,1,fp8,fp8,0,0.06576319932937622
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,48,4,128,1,float16,float16,0,0.07011680006980896
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,48,4,128,1,float16,fp8,0,0.06578239798545837
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,48,8,128,1,float16,float16,0,0.06996960043907166
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,48,4,128,1,fp8,fp8,0,0.06570879817008972
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,48,8,128,1,float16,fp8,0,0.0658079981803894
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,48,48,128,1,float16,float16,0,0.04125120043754578
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,48,8,128,1,fp8,fp8,0,0.06584960222244263
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,48,1,128,1,float16,float16,0,0.04008640050888061
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,48,48,128,1,float16,fp8,0,0.0411215990781784
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,48,48,128,1,fp8,fp8,0,0.041198399662971494
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,48,1,128,1,float16,fp8,0,0.03858399987220764
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,48,1,128,1,fp8,fp8,0,0.03711360096931458
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,48,2,128,1,float16,float16,0,0.03918400108814239
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,48,2,128,1,float16,fp8,0,0.038796800374984744
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,48,2,128,1,fp8,fp8,0,0.03718560039997101
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,48,4,128,1,float16,float16,0,0.039164799451828006
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,48,4,128,1,float16,fp8,0,0.037441599369049075
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,48,4,128,1,fp8,fp8,0,0.037161600589752194
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,48,8,128,1,float16,float16,0,0.039238399267196654
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,48,8,128,1,float16,fp8,0,0.03738240003585815
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,48,48,128,1,float16,float16,0,0.02738400101661682
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,48,48,128,1,float16,fp8,0,0.02483839988708496
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,48,8,128,1,fp8,fp8,0,0.037459200620651244
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,48,48,128,1,fp8,fp8,0,0.0248879998922348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,48,1,128,1,float16,float16,0,0.024784000217914583
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,48,1,128,1,float16,fp8,0,0.023035199940204622
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,48,2,128,1,float16,float16,0,0.024859200417995452
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,48,1,128,1,fp8,fp8,0,0.024911999702453613
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,48,2,128,1,fp8,fp8,0,0.02462559938430786
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,48,2,128,1,float16,fp8,0,0.023907199501991272
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,48,4,128,1,float16,float16,0,0.02508159875869751
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,48,4,128,1,float16,fp8,0,0.024728000164031982
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,48,4,128,1,fp8,fp8,0,0.023556800186634065
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,48,8,128,1,float16,fp8,0,0.024689599871635437
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,48,8,128,1,float16,float16,0,0.025148800015449523
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,48,8,128,1,fp8,fp8,0,0.023528000712394713
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,48,48,128,1,float16,float16,0,0.01828960031270981
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,48,48,128,1,float16,fp8,0,0.016752000153064727
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,48,48,128,1,fp8,fp8,0,0.016756799817085267
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,48,1,128,1,float16,float16,0,0.016604800522327424
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,48,1,128,1,fp8,fp8,0,0.016571199893951415
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,48,2,128,1,float16,float16,0,0.01661919951438904
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,48,1,128,1,float16,fp8,0,0.016689600050449373
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,48,2,128,1,fp8,fp8,0,0.016531200706958772
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,48,2,128,1,float16,fp8,0,0.01659200042486191
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,48,4,128,1,float16,float16,0,0.016791999340057373
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,48,4,128,1,float16,fp8,0,0.016603200137615202
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,48,4,128,1,fp8,fp8,0,0.016521599888801575
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,48,8,128,1,float16,float16,0,0.016777600347995757
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,48,8,128,1,float16,fp8,0,0.01660960018634796
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,48,8,128,1,fp8,fp8,0,0.016572800278663636
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,48,48,128,1,float16,fp8,0,0.014420799911022186
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,48,48,128,1,float16,float16,0,0.014766399562358857
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,48,48,128,1,fp8,fp8,0,0.014425599575042724
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,48,1,128,1,float16,float16,0,0.014382399618625641
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,48,1,128,1,fp8,fp8,0,0.01316000074148178
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,48,1,128,1,float16,fp8,0,0.012723200023174286
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,48,2,128,1,float16,float16,0,0.014395199716091156
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,48,2,128,1,float16,fp8,0,0.013478399813175201
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,48,2,128,1,fp8,fp8,0,0.01255040019750595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,48,4,128,1,float16,float16,0,0.012697599828243256
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,48,4,128,1,float16,fp8,0,0.012558400630950928
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,48,8,128,1,float16,float16,0,0.013201600313186646
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,48,4,128,1,fp8,fp8,0,0.012491200119256973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,48,8,128,1,float16,fp8,0,0.012534399330615998
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,48,8,128,1,fp8,fp8,0,0.012572799623012543
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,48,48,128,1,float16,float16,0,0.012323199957609176
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,48,48,128,1,float16,fp8,0,0.010516799986362457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,48,48,128,1,fp8,fp8,0,0.010473600029945374
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,48,1,128,1,float16,float16,0,0.010582400113344192
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,48,1,128,1,fp8,fp8,0,0.010492800176143647
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,48,1,128,1,float16,fp8,0,0.01050880029797554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,48,2,128,1,float16,float16,0,0.010396800190210342
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,48,2,128,1,float16,fp8,0,0.010516799986362457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,48,2,128,1,fp8,fp8,0,0.010489600151777268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,48,4,128,1,float16,float16,0,0.010441599786281586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,48,4,128,1,float16,fp8,0,0.010531199723482132
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,48,4,128,1,fp8,fp8,0,0.010463999956846238
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,48,8,128,1,float16,fp8,0,0.010518400371074677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,48,8,128,1,float16,float16,0,0.010422399640083313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,48,48,128,1,float16,float16,0,0.012299200147390365
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,48,8,128,1,fp8,fp8,0,0.01045759990811348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,48,48,128,1,float16,fp8,0,0.010427200049161912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,48,1,128,1,float16,float16,0,0.010491199791431427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,48,48,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,48,1,128,1,float16,fp8,0,0.010515200346708298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,48,2,128,1,float16,float16,0,0.010507199913263321
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,48,1,128,1,fp8,fp8,0,0.010385599732398988
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,48,2,128,1,float16,fp8,0,0.010411199927330018
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,48,2,128,1,fp8,fp8,0,0.010345599800348281
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,48,4,128,1,float16,float16,0,0.010345599800348281
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,48,4,128,1,float16,fp8,0,0.010425599664449692
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,48,4,128,1,fp8,fp8,0,0.010372799634933472
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,48,8,128,1,float16,float16,0,0.010438399761915207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,48,8,128,1,float16,fp8,0,0.010406400263309478
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,48,8,128,1,fp8,fp8,0,0.010435199737548828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,48,1,128,1,float16,float16,0,0.24041600227355958
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,48,1,128,1,float16,fp8,0,0.22611680030822753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,48,1,128,1,fp8,fp8,0,0.22565760612487792
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,48,2,128,1,float16,float16,0,0.24061279296875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,48,2,128,1,float16,fp8,0,0.22623040676116943
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,48,2,128,1,fp8,fp8,0,0.22554559707641603
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,48,4,128,1,float16,float16,0,0.24041759967803955
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,48,4,128,1,float16,fp8,0,0.22605440616607667
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,48,4,128,1,fp8,fp8,0,0.22576799392700195
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,48,8,128,1,float16,float16,0,0.24050240516662597
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,48,8,128,1,fp8,fp8,0,0.2263792037963867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,48,8,128,1,float16,fp8,0,0.22564001083374025
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,48,48,128,1,float16,float16,0,0.1268447995185852
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,48,48,128,1,float16,fp8,0,0.11906720399856567
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,48,48,128,1,fp8,fp8,0,0.11852320432662963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,48,1,128,1,float16,float16,0,0.12547199726104735
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,48,1,128,1,float16,fp8,0,0.11830559968948365
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,48,1,128,1,fp8,fp8,0,0.11709760427474976
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,48,2,128,1,float16,fp8,0,0.11843680143356324
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,48,2,128,1,float16,float16,0,0.125273597240448
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,48,2,128,1,fp8,fp8,0,0.1169935941696167
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,48,4,128,1,float16,float16,0,0.12703679800033568
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,48,4,128,1,float16,fp8,0,0.1171231985092163
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,48,4,128,1,fp8,fp8,0,0.11699839830398559
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,48,8,128,1,float16,float16,0,0.12669600248336793
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,48,8,128,1,float16,fp8,0,0.11702560186386109
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,48,8,128,1,fp8,fp8,0,0.11708159446716308
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,48,48,128,1,float16,float16,0,0.06996960043907166
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,48,48,128,1,float16,fp8,0,0.06375359892845153
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,48,48,128,1,fp8,fp8,0,0.06375839710235595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,48,1,128,1,float16,float16,0,0.06843839883804322
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,48,1,128,1,float16,fp8,0,0.0637279987335205
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,48,1,128,1,fp8,fp8,0,0.06370880007743836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,48,2,128,1,float16,float16,0,0.06805440187454223
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,48,2,128,1,float16,fp8,0,0.06386079788208007
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,48,2,128,1,fp8,fp8,0,0.06378080248832703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,48,4,128,1,float16,float16,0,0.0679040014743805
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,48,4,128,1,float16,fp8,0,0.06377120018005371
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,48,4,128,1,fp8,fp8,0,0.06382880210876465
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,48,8,128,1,float16,float16,0,0.06791200041770935
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,48,8,128,1,fp8,fp8,0,0.06376799941062927
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,48,8,128,1,float16,fp8,0,0.0639743983745575
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,48,48,128,1,float16,float16,0,0.03932639956474304
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,48,48,128,1,float16,fp8,0,0.037134400010108946
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,48,1,128,1,float16,float16,0,0.039190399646759036
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,48,48,128,1,fp8,fp8,0,0.037115201354026794
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,48,1,128,1,float16,fp8,0,0.03707199990749359
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,48,1,128,1,fp8,fp8,0,0.03704800009727478
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,48,2,128,1,float16,fp8,0,0.03712640106678009
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,48,2,128,1,float16,float16,0,0.039315199851989745
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,48,2,128,1,fp8,fp8,0,0.03709760010242462
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,48,4,128,1,float16,float16,0,0.03918719887733459
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,48,4,128,1,float16,fp8,0,0.03731679916381836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,48,4,128,1,fp8,fp8,0,0.037062400579452516
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,48,8,128,1,float16,float16,0,0.03909280002117157
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,48,8,128,1,float16,fp8,0,0.03722079992294312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,48,48,128,1,float16,float16,0,0.02664639949798584
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,48,8,128,1,fp8,fp8,0,0.03713119924068451
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,48,48,128,1,fp8,fp8,0,0.024676799774169922
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,48,1,128,1,float16,float16,0,0.024798400700092316
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,48,48,128,1,float16,fp8,0,0.02290560007095337
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,48,1,128,1,float16,fp8,0,0.023984000086784363
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,48,1,128,1,fp8,fp8,0,0.022832000255584718
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,48,2,128,1,float16,float16,0,0.025033599138259886
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,48,2,128,1,float16,fp8,0,0.024481600522994994
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,48,2,128,1,fp8,fp8,0,0.022830399870872497
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,48,4,128,1,float16,float16,0,0.024963200092315674
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,48,4,128,1,float16,fp8,0,0.023839999735355378
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,48,4,128,1,fp8,fp8,0,0.022868800163269042
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,48,8,128,1,float16,float16,0,0.02496480047702789
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,48,8,128,1,float16,fp8,0,0.022819200158119203
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,48,8,128,1,fp8,fp8,0,0.02287680059671402
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,48,48,128,1,float16,fp8,0,0.01648319959640503
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,48,48,128,1,float16,float16,0,0.018718400597572328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,48,48,128,1,fp8,fp8,0,0.01650400012731552
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,48,1,128,1,float16,float16,0,0.016707199811935424
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,48,1,128,1,float16,fp8,0,0.016620799899101257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,48,2,128,1,float16,float16,0,0.016625599563121797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,48,1,128,1,fp8,fp8,0,0.01655520051717758
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,48,2,128,1,fp8,fp8,0,0.01655520051717758
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,48,4,128,1,float16,float16,0,0.016633599996566772
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,48,2,128,1,float16,fp8,0,0.016022400557994844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,48,4,128,1,float16,fp8,0,0.0164560005068779
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,48,4,128,1,fp8,fp8,0,0.016444799304008485
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,48,8,128,1,float16,float16,0,0.016577599942684172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,48,8,128,1,float16,fp8,0,0.01655520051717758
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,48,8,128,1,fp8,fp8,0,0.01656319946050644
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,48,48,128,1,float16,float16,0,0.014478400349617004
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,48,48,128,1,float16,fp8,0,0.012534399330615998
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,48,48,128,1,fp8,fp8,0,0.012569600343704223
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,48,1,128,1,float16,float16,0,0.012521600723266602
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,48,1,128,1,float16,fp8,0,0.012579199671745301
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,48,1,128,1,fp8,fp8,0,0.012579199671745301
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,48,2,128,1,float16,float16,0,0.012729600071907043
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,48,2,128,1,float16,fp8,0,0.012587200105190276
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,48,2,128,1,fp8,fp8,0,0.012683199346065521
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,48,4,128,1,float16,float16,0,0.01255359947681427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,48,4,128,1,float16,fp8,0,0.012468799948692322
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,48,4,128,1,fp8,fp8,0,0.01249919980764389
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,48,8,128,1,float16,float16,0,0.013787199556827546
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,48,8,128,1,float16,fp8,0,0.0125231996178627
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,48,48,128,1,float16,float16,0,0.012348800152540206
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,48,8,128,1,fp8,fp8,0,0.012595200538635254
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,48,48,128,1,float16,fp8,0,0.010515200346708298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,48,48,128,1,fp8,fp8,0,0.010406400263309478
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,48,1,128,1,float16,float16,0,0.01053439974784851
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,48,1,128,1,float16,fp8,0,0.010427200049161912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,48,2,128,1,float16,float16,0,0.010556799918413162
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,48,1,128,1,fp8,fp8,0,0.0104592002928257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,48,2,128,1,float16,fp8,0,0.01040000021457672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,48,2,128,1,fp8,fp8,0,0.010496000200510025
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,48,4,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,48,4,128,1,float16,fp8,0,0.010444799810647965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,48,8,128,1,float16,float16,0,0.010487999767065048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,48,4,128,1,fp8,fp8,0,0.0105103999376297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,48,8,128,1,float16,fp8,0,0.010487999767065048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,48,8,128,1,fp8,fp8,0,0.010436800122261048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,48,48,128,1,float16,float16,0,0.012409599870443344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,48,48,128,1,float16,fp8,0,0.010390400141477584
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,48,48,128,1,fp8,fp8,0,0.010409600287675857
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,48,1,128,1,float16,fp8,0,0.010422399640083313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,48,1,128,1,float16,float16,0,0.010606399923563003
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,48,2,128,1,float16,float16,0,0.010356800258159637
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,48,1,128,1,fp8,fp8,0,0.01053439974784851
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,48,2,128,1,float16,fp8,0,0.010396800190210342
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,48,2,128,1,fp8,fp8,0,0.010372799634933472
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,48,4,128,1,float16,float16,0,0.010414399951696397
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,48,4,128,1,float16,fp8,0,0.010345599800348281
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,48,4,128,1,fp8,fp8,0,0.01045600026845932
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,48,8,128,1,float16,float16,0,0.010425599664449692
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,48,8,128,1,float16,fp8,0,0.010369600355625152
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,48,8,128,1,fp8,fp8,0,0.010375999659299851
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,40,1,128,1,float16,fp8,0,13.780076599121093
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,40,1,128,1,fp8,fp8,0,13.533978271484376
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,40,1,128,1,float16,float16,0,23.867352294921876
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,40,2,128,1,float16,float16,0,22.020046997070313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,40,2,128,1,float16,fp8,0,14.873226928710938
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,40,2,128,1,fp8,fp8,0,14.66943359375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,40,4,128,1,float16,fp8,0,14.731190490722657
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,40,4,128,1,fp8,fp8,0,15.0474365234375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,40,4,128,1,float16,float16,0,25.355242919921874
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,40,8,128,1,float16,float16,0,24.049502563476562
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,40,8,128,1,float16,fp8,0,14.543959045410157
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,40,8,128,1,fp8,fp8,0,15.15267333984375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,40,40,128,1,float16,fp8,0,7.10833740234375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,40,40,128,1,fp8,fp8,0,7.482465362548828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,40,1,128,1,float16,float16,0,12.446710205078125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,40,1,128,1,float16,fp8,0,6.945995330810547
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,40,1,128,1,fp8,fp8,0,7.162638092041016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,40,2,128,1,float16,fp8,0,6.925198364257812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,40,2,128,1,fp8,fp8,0,7.068233489990234
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,40,2,128,1,float16,float16,0,12.097571563720702
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,40,4,128,1,float16,fp8,0,7.117518615722656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,40,4,128,1,float16,float16,0,12.67177734375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,40,4,128,1,fp8,fp8,0,7.3276222229003904
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,40,8,128,1,fp8,fp8,0,7.313448333740235
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,40,8,128,1,float16,fp8,0,7.215574645996094
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,40,8,128,1,float16,float16,0,12.399723052978516
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,40,40,128,1,float16,fp8,0,3.708687973022461
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,40,40,128,1,fp8,fp8,0,3.6545089721679687
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,40,1,128,1,float16,fp8,0,3.4340030670166017
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,40,1,128,1,float16,float16,0,5.567281723022461
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,40,1,128,1,fp8,fp8,0,3.5475391387939452
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,40,2,128,1,float16,float16,0,5.888812637329101
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,40,2,128,1,float16,fp8,0,3.3895694732666017
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,40,2,128,1,fp8,fp8,0,3.6833614349365233
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,40,4,128,1,float16,float16,0,5.38946418762207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,40,4,128,1,float16,fp8,0,3.5063953399658203
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,40,4,128,1,fp8,fp8,0,3.4854015350341796
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,40,8,128,1,float16,fp8,0,3.7279617309570314
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,40,8,128,1,float16,float16,0,5.820259094238281
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,40,8,128,1,fp8,fp8,0,3.602056121826172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,40,40,128,1,float16,fp8,0,1.9080448150634766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,40,40,128,1,fp8,fp8,0,1.8208192825317382
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,40,1,128,1,float16,float16,0,1.970684814453125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,40,1,128,1,float16,fp8,0,2.553838348388672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,40,1,128,1,fp8,fp8,0,1.7515439987182617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,40,2,128,1,float16,float16,0,2.266619110107422
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,40,2,128,1,float16,fp8,0,2.390227127075195
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,40,2,128,1,fp8,fp8,0,1.749897575378418
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,40,4,128,1,float16,float16,0,1.937436866760254
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,40,4,128,1,float16,fp8,0,1.9339775085449218
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,40,4,128,1,fp8,fp8,0,1.7571504592895508
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,40,8,128,1,float16,float16,0,2.013540840148926
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,40,8,128,1,float16,fp8,0,1.922235107421875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,40,8,128,1,fp8,fp8,0,1.7594287872314454
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,40,1,128,1,float16,float16,0,14.258111572265625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,40,1,128,1,float16,fp8,0,8.180538940429688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,40,1,128,1,fp8,fp8,0,8.231047821044921
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,40,2,128,1,float16,float16,0,14.324198913574218
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,40,2,128,1,float16,fp8,0,8.389868927001952
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,40,2,128,1,fp8,fp8,0,8.119609832763672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,40,4,128,1,float16,float16,0,14.47638397216797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,40,40,128,1,float16,float16,0,2.1153280258178713
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,40,40,128,1,float16,float16,0,11.228132629394532
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,40,40,128,1,float16,float16,0,5.262723159790039
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,40,4,128,1,fp8,fp8,0,7.758837127685547
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,40,4,128,1,float16,fp8,0,8.306136322021484
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,40,8,128,1,fp8,fp8,0,7.871453094482422
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,40,40,128,1,float16,float16,0,6.319347381591797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,40,8,128,1,float16,fp8,0,7.926683044433593
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,40,8,128,1,float16,float16,0,13.583163452148437
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,40,40,128,1,float16,fp8,0,4.043681716918945
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,40,40,128,1,fp8,fp8,0,4.110355377197266
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,40,1,128,1,float16,float16,0,6.23243522644043
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,40,1,128,1,fp8,fp8,0,4.15136947631836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,40,1,128,1,float16,fp8,0,4.4687553405761715
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,40,2,128,1,float16,float16,0,6.1754608154296875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,40,2,128,1,float16,fp8,0,4.262732696533203
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,40,2,128,1,fp8,fp8,0,4.213641738891601
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,40,4,128,1,float16,fp8,0,4.16595344543457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,40,4,128,1,fp8,fp8,0,4.091678237915039
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,40,4,128,1,float16,float16,0,6.975393676757813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,40,8,128,1,float16,float16,0,6.420375823974609
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,40,40,128,1,float16,float16,0,3.5265296936035155
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,40,8,128,1,float16,fp8,0,4.418283081054687
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,40,8,128,1,fp8,fp8,0,4.3608753204345705
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,40,40,128,1,float16,fp8,0,2.326763153076172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,40,1,128,1,float16,fp8,0,1.9742992401123047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,40,40,128,1,fp8,fp8,0,2.473908805847168
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,40,1,128,1,float16,float16,0,2.30164794921875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,40,1,128,1,fp8,fp8,0,1.9827903747558593
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,40,2,128,1,float16,fp8,0,2.0368751525878905
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,40,2,128,1,float16,float16,0,2.9890560150146483
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,40,2,128,1,fp8,fp8,0,2.699488067626953
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,40,4,128,1,float16,fp8,0,2.0051984786987305
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,40,4,128,1,float16,float16,0,2.908216094970703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,40,4,128,1,fp8,fp8,0,2.3239072799682616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,40,8,128,1,float16,float16,0,2.4635408401489256
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,40,8,128,1,float16,fp8,0,1.9854991912841797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,40,40,128,1,float16,float16,0,1.2409855842590332
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,40,8,128,1,fp8,fp8,0,2.5797887802124024
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,40,40,128,1,float16,fp8,0,1.924679946899414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,40,40,128,1,fp8,fp8,0,1.1354687690734864
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,40,1,128,1,float16,float16,0,1.1683919906616211
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,40,1,128,1,float16,fp8,0,1.0320544242858887
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,40,1,128,1,fp8,fp8,0,1.0416383743286133
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,40,2,128,1,float16,fp8,0,1.04618558883667
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,40,2,128,1,float16,float16,0,1.369966411590576
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,40,2,128,1,fp8,fp8,0,1.0486543655395508
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,40,4,128,1,float16,float16,0,1.1634639739990233
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,40,4,128,1,float16,fp8,0,1.1349967956542968
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,40,4,128,1,fp8,fp8,0,1.048256015777588
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,40,8,128,1,float16,float16,0,1.2568448066711426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,40,8,128,1,float16,fp8,0,1.0519328117370605
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,40,8,128,1,fp8,fp8,0,1.0493359565734863
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,40,1,128,1,float16,fp8,0,5.575344085693359
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,40,1,128,1,fp8,fp8,0,5.583955383300781
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,40,2,128,1,float16,float16,0,8.996603393554688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,40,1,128,1,float16,float16,0,9.442291259765625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,40,2,128,1,float16,fp8,0,5.647411346435547
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,40,2,128,1,fp8,fp8,0,5.676023864746094
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,40,4,128,1,float16,fp8,0,5.744105529785156
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,40,4,128,1,fp8,fp8,0,5.7816001892089846
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,40,4,128,1,float16,float16,0,9.461615753173827
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,40,8,128,1,float16,float16,0,9.812580871582032
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,40,8,128,1,float16,fp8,0,5.996470260620117
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,40,40,128,1,float16,float16,0,4.969140625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,40,8,128,1,fp8,fp8,0,5.956379318237305
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,40,40,128,1,float16,fp8,0,3.256857681274414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,40,40,128,1,fp8,fp8,0,3.0026432037353517
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,40,1,128,1,float16,fp8,0,2.944131278991699
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,40,1,128,1,fp8,fp8,0,2.8833215713500975
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,40,1,128,1,float16,float16,0,5.060667037963867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,40,2,128,1,float16,float16,0,3.876006317138672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,40,2,128,1,float16,fp8,0,2.914508819580078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,40,2,128,1,fp8,fp8,0,2.946358489990234
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,40,4,128,1,float16,fp8,0,2.7702688217163085
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,40,4,128,1,fp8,fp8,0,2.8300928115844726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,40,4,128,1,float16,float16,0,5.095017623901367
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,40,8,128,1,float16,fp8,0,2.894196891784668
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,40,8,128,1,float16,float16,0,4.48330078125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,40,8,128,1,fp8,fp8,0,2.851083183288574
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,40,40,128,1,float16,float16,0,2.7634960174560548
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,40,40,128,1,float16,fp8,0,1.6700143814086914
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,40,40,128,1,fp8,fp8,0,1.6917871475219726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,40,1,128,1,float16,float16,0,1.9701904296875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,40,1,128,1,fp8,fp8,0,1.4157584190368653
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,40,1,128,1,float16,fp8,0,2.038006401062012
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,40,2,128,1,float16,float16,0,1.6480287551879882
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,40,2,128,1,fp8,fp8,0,1.424612808227539
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,40,2,128,1,float16,fp8,0,2.1391263961791993
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,40,4,128,1,float16,float16,0,1.773806381225586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,40,4,128,1,float16,fp8,0,1.767153549194336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,40,4,128,1,fp8,fp8,0,1.4260031700134277
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,40,8,128,1,float16,float16,0,1.5880288124084472
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,40,40,128,1,float16,float16,0,0.8961855888366699
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,40,8,128,1,fp8,fp8,0,1.429851245880127
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,40,40,128,1,float16,fp8,0,1.0276080131530763
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,40,8,128,1,float16,fp8,0,2.2395200729370117
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,40,40,128,1,fp8,fp8,0,0.9633551597595215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,40,1,128,1,float16,float16,0,0.8603887557983398
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,40,1,128,1,float16,fp8,0,0.7533247947692872
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,40,1,128,1,fp8,fp8,0,0.8057295799255371
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,40,2,128,1,float16,float16,0,0.8616703987121582
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,40,2,128,1,fp8,fp8,0,0.7542655944824219
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,40,2,128,1,float16,fp8,0,1.0780624389648437
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,40,4,128,1,float16,float16,0,0.8564736366271972
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,40,4,128,1,float16,fp8,0,0.7861968040466308
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,40,4,128,1,fp8,fp8,0,0.7532080173492431
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,40,8,128,1,float16,float16,0,0.939902400970459
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,40,8,128,1,float16,fp8,0,0.7894832134246826
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,40,8,128,1,fp8,fp8,0,0.7907375812530517
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,40,1,128,1,float16,fp8,0,7.6404579162597654
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,40,1,128,1,fp8,fp8,0,7.6523582458496096
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,40,1,128,1,float16,float16,0,12.306928253173828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,40,2,128,1,float16,float16,0,12.181755065917969
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,40,2,128,1,float16,fp8,0,7.630209350585938
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,40,2,128,1,fp8,fp8,0,7.8043983459472654
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,40,4,128,1,float16,fp8,0,7.797262573242188
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,40,4,128,1,float16,float16,0,12.844012451171874
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,40,4,128,1,fp8,fp8,0,7.848798370361328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,40,8,128,1,float16,fp8,0,7.892671966552735
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,40,8,128,1,float16,float16,0,14.369964599609375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,40,40,128,1,float16,float16,0,6.687161254882812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,40,8,128,1,fp8,fp8,0,7.580745697021484
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,40,40,128,1,float16,fp8,0,3.968628692626953
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,40,40,128,1,fp8,fp8,0,4.298870468139649
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,40,1,128,1,float16,fp8,0,3.6775184631347657
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,40,1,128,1,fp8,fp8,0,3.640580749511719
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,40,1,128,1,float16,float16,0,6.219615936279297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,40,2,128,1,float16,fp8,0,3.6852752685546877
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,40,2,128,1,fp8,fp8,0,3.724699020385742
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,40,2,128,1,float16,float16,0,6.3031566619873045
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,40,4,128,1,float16,float16,0,6.2168224334716795
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,40,4,128,1,float16,fp8,0,3.729547119140625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,40,4,128,1,fp8,fp8,0,3.743239974975586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,40,40,128,1,float16,float16,0,3.0716352462768555
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,40,8,128,1,float16,float16,0,6.423455810546875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,40,8,128,1,float16,fp8,0,3.880201721191406
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,40,8,128,1,fp8,fp8,0,3.6485198974609374
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,40,1,128,1,float16,float16,0,2.300065612792969
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,40,40,128,1,float16,fp8,0,2.4831472396850587
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,40,40,128,1,fp8,fp8,0,2.495636749267578
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,40,1,128,1,fp8,fp8,0,1.829707145690918
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,40,1,128,1,float16,fp8,0,2.236631965637207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,40,2,128,1,float16,float16,0,2.332147216796875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,40,2,128,1,float16,fp8,0,1.852027130126953
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,40,2,128,1,fp8,fp8,0,1.8253904342651368
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,40,4,128,1,float16,fp8,0,1.8430751800537108
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,40,4,128,1,fp8,fp8,0,1.8130239486694335
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,40,4,128,1,float16,float16,0,3.4021633148193358
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,40,8,128,1,float16,float16,0,2.6610895156860352
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,40,40,128,1,float16,float16,0,1.7131183624267579
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,40,8,128,1,float16,fp8,0,1.8613216400146484
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,40,8,128,1,fp8,fp8,0,1.8216928482055663
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,40,40,128,1,float16,fp8,0,1.4614447593688964
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,40,1,128,1,float16,fp8,0,0.9575008392333985
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,40,1,128,1,float16,float16,0,1.0568016052246094
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,40,40,128,1,fp8,fp8,0,1.5437312126159668
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,40,1,128,1,fp8,fp8,0,0.9790240287780761
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,40,2,128,1,float16,float16,0,1.0925328254699707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,40,2,128,1,float16,fp8,0,1.1840144157409669
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,40,2,128,1,fp8,fp8,0,0.9629728317260742
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,40,4,128,1,float16,float16,0,1.0700688362121582
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,40,4,128,1,fp8,fp8,0,1.0307231903076173
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,40,4,128,1,float16,fp8,0,1.1190640449523925
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,40,8,128,1,float16,float16,0,1.0740511894226075
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,40,8,128,1,float16,fp8,0,1.1285632133483887
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,40,40,128,1,float16,float16,0,0.6290031909942627
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,40,40,128,1,float16,fp8,0,0.5556575775146484
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,40,8,128,1,fp8,fp8,0,0.9738256454467773
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,40,40,128,1,fp8,fp8,0,0.5493184089660644
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,40,1,128,1,float16,float16,0,0.587343978881836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,40,1,128,1,float16,fp8,0,0.5381824016571045
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,40,1,128,1,fp8,fp8,0,0.5215167999267578
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,40,2,128,1,float16,float16,0,0.577623987197876
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,40,2,128,1,float16,fp8,0,0.6082111835479737
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,40,2,128,1,fp8,fp8,0,0.5358863830566406
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,40,4,128,1,float16,float16,0,0.5705872058868409
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,40,4,128,1,float16,fp8,0,0.5162928104400635
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,40,4,128,1,fp8,fp8,0,0.6088287830352783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,40,8,128,1,float16,float16,0,0.5844528198242187
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,40,8,128,1,float16,fp8,0,0.5144959926605225
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,40,8,128,1,fp8,fp8,0,0.5147024154663086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,40,1,128,1,fp8,fp8,0,4.251903915405274
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,40,1,128,1,float16,fp8,0,4.508524703979492
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,40,1,128,1,float16,float16,0,6.723478698730469
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,40,2,128,1,float16,float16,0,6.681806182861328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,40,2,128,1,float16,fp8,0,4.278116989135742
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,40,2,128,1,fp8,fp8,0,4.366843032836914
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,40,4,128,1,float16,fp8,0,4.314580917358398
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,40,4,128,1,float16,float16,0,6.918672180175781
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,40,4,128,1,fp8,fp8,0,4.420716857910156
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,40,8,128,1,float16,fp8,0,4.4015663146972654
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,40,8,128,1,float16,float16,0,7.563056182861328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,40,40,128,1,float16,float16,0,3.4084495544433593
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,40,8,128,1,fp8,fp8,0,4.542671966552734
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,40,40,128,1,float16,fp8,0,2.357926368713379
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,40,40,128,1,fp8,fp8,0,2.806550407409668
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,40,1,128,1,float16,float16,0,3.4971233367919923
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,40,1,128,1,float16,fp8,0,2.236720085144043
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,40,1,128,1,fp8,fp8,0,2.1958431243896483
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,40,2,128,1,float16,float16,0,3.0850400924682617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,40,2,128,1,fp8,fp8,0,2.238355255126953
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,40,2,128,1,float16,fp8,0,2.459275245666504
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,40,4,128,1,float16,float16,0,3.248961639404297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,40,4,128,1,fp8,fp8,0,2.185976028442383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,40,4,128,1,float16,fp8,0,2.800177574157715
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,40,8,128,1,float16,float16,0,3.2162704467773438
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,40,8,128,1,float16,fp8,0,2.367750358581543
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,40,40,128,1,float16,float16,0,1.3364463806152345
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,40,8,128,1,fp8,fp8,0,2.2664255142211913
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,40,40,128,1,float16,fp8,0,1.4265263557434082
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,40,40,128,1,fp8,fp8,0,1.2340543746948243
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,40,1,128,1,float16,float16,0,1.7638704299926757
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,40,1,128,1,float16,fp8,0,1.1125823974609375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,40,1,128,1,fp8,fp8,0,1.1061264038085938
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,40,2,128,1,float16,float16,0,1.2652912139892578
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,40,2,128,1,fp8,fp8,0,1.1083776473999023
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,40,2,128,1,float16,fp8,0,1.5486000061035157
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,40,4,128,1,float16,float16,0,1.197702407836914
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,40,4,128,1,float16,fp8,0,1.402785587310791
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,40,4,128,1,fp8,fp8,0,1.1080320358276368
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,40,8,128,1,float16,fp8,0,1.265891170501709
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,40,8,128,1,float16,float16,0,1.3827887535095216
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,40,40,128,1,float16,float16,0,0.7201104164123535
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,40,8,128,1,fp8,fp8,0,1.1680543899536133
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,40,40,128,1,float16,fp8,0,0.6709072113037109
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,40,1,128,1,float16,float16,0,0.6570256233215332
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,40,40,128,1,fp8,fp8,0,0.7772992134094239
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,40,1,128,1,float16,fp8,0,0.5852240085601806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,40,1,128,1,fp8,fp8,0,0.5830656051635742
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,40,2,128,1,float16,float16,0,0.7296063899993896
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,40,2,128,1,fp8,fp8,0,0.5820176124572753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,40,2,128,1,float16,fp8,0,0.6789423942565918
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,40,4,128,1,float16,float16,0,0.6423711776733398
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,40,4,128,1,fp8,fp8,0,0.5817071914672851
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,40,4,128,1,float16,fp8,0,0.6441440105438232
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,40,8,128,1,float16,float16,0,0.698531198501587
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,40,8,128,1,float16,fp8,0,0.5849647998809815
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,40,40,128,1,float16,float16,0,0.3965951919555664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,40,8,128,1,fp8,fp8,0,0.5807568073272705
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,40,40,128,1,float16,fp8,0,0.351361608505249
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,40,40,128,1,fp8,fp8,0,0.3577600002288818
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,40,1,128,1,float16,float16,0,0.35577759742736814
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,40,1,128,1,float16,fp8,0,0.3223520040512085
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,40,1,128,1,fp8,fp8,0,0.32318720817565916
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,40,2,128,1,float16,float16,0,0.35817599296569824
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,40,2,128,1,float16,fp8,0,0.32122719287872314
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,40,2,128,1,fp8,fp8,0,0.323140811920166
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,40,4,128,1,float16,float16,0,0.3596672058105469
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,40,4,128,1,float16,fp8,0,0.32204959392547605
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,40,4,128,1,fp8,fp8,0,0.3218208074569702
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,40,8,128,1,float16,float16,0,0.3671247959136963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,40,8,128,1,float16,fp8,0,0.3201647996902466
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,40,8,128,1,fp8,fp8,0,0.32250399589538575
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,40,1,128,1,float16,fp8,0,4.087750244140625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,40,1,128,1,fp8,fp8,0,4.09664306640625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,40,1,128,1,float16,float16,0,6.10191535949707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,40,2,128,1,float16,fp8,0,4.073801422119141
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,40,2,128,1,fp8,fp8,0,4.092118453979492
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,40,2,128,1,float16,float16,0,5.7647968292236325
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,40,4,128,1,float16,float16,0,6.576725006103516
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,40,4,128,1,float16,fp8,0,4.100955200195313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,40,4,128,1,fp8,fp8,0,4.071680068969727
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,40,40,128,1,float16,float16,0,3.1959455490112303
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,40,8,128,1,float16,float16,0,6.684297943115235
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,40,8,128,1,float16,fp8,0,4.219473648071289
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,40,8,128,1,fp8,fp8,0,4.116313552856445
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,40,40,128,1,float16,fp8,0,2.4439119338989257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,40,1,128,1,float16,fp8,0,2.0552831649780274
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,40,40,128,1,fp8,fp8,0,2.6059776306152345
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,40,1,128,1,float16,float16,0,2.9155792236328124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,40,1,128,1,fp8,fp8,0,2.0729856491088867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,40,2,128,1,float16,fp8,0,2.032145690917969
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,40,2,128,1,float16,float16,0,2.2564191818237305
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,40,2,128,1,fp8,fp8,0,2.273303985595703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,40,4,128,1,float16,fp8,0,2.114279937744141
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,40,4,128,1,float16,float16,0,2.6675567626953125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,40,4,128,1,fp8,fp8,0,2.367571258544922
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,40,8,128,1,float16,float16,0,2.601273536682129
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,40,40,128,1,float16,float16,0,1.3030927658081055
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,40,8,128,1,float16,fp8,0,2.033697509765625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,40,8,128,1,fp8,fp8,0,2.3173168182373045
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,40,40,128,1,float16,fp8,0,1.4657808303833009
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,40,40,128,1,fp8,fp8,0,1.245680046081543
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,40,1,128,1,float16,float16,0,1.161177635192871
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,40,1,128,1,float16,fp8,0,1.119212818145752
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,40,1,128,1,fp8,fp8,0,1.1041904449462892
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,40,2,128,1,float16,float16,0,1.1762319564819337
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,40,2,128,1,float16,fp8,0,1.4291919708251952
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,40,2,128,1,fp8,fp8,0,1.0893648147583008
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,40,4,128,1,float16,float16,0,1.1374815940856933
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,40,4,128,1,float16,fp8,0,1.2725680351257325
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,40,4,128,1,fp8,fp8,0,1.149961566925049
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,40,8,128,1,float16,float16,0,1.1596912384033202
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,40,8,128,1,float16,fp8,0,1.2350784301757813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,40,40,128,1,float16,float16,0,0.6853807926177978
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,40,40,128,1,float16,fp8,0,0.6254000186920166
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,40,8,128,1,fp8,fp8,0,1.0719823837280273
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,40,1,128,1,float16,float16,0,0.6011775970458985
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,40,40,128,1,fp8,fp8,0,0.6674911975860596
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,40,1,128,1,float16,fp8,0,0.5818416118621826
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,40,1,128,1,fp8,fp8,0,0.5514848232269287
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,40,2,128,1,float16,fp8,0,0.5923535823822021
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,40,2,128,1,float16,float16,0,0.6039535999298096
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,40,2,128,1,fp8,fp8,0,0.5728975772857666
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,40,4,128,1,float16,float16,0,0.5966288089752197
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,40,4,128,1,float16,fp8,0,0.5484096050262451
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,40,4,128,1,fp8,fp8,0,0.5881887912750244
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,40,8,128,1,float16,float16,0,0.6141712188720703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,40,8,128,1,float16,fp8,0,0.5531824111938477
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,40,40,128,1,float16,float16,0,0.37012639045715334
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,40,8,128,1,fp8,fp8,0,0.5483168125152588
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,40,40,128,1,float16,fp8,0,0.33760321140289307
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,40,40,128,1,fp8,fp8,0,0.3359760046005249
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,40,1,128,1,float16,float16,0,0.3240288019180298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,40,1,128,1,float16,fp8,0,0.30046079158782957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,40,1,128,1,fp8,fp8,0,0.3293888092041016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,40,2,128,1,float16,float16,0,0.3214927911758423
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,40,2,128,1,float16,fp8,0,0.29915680885314944
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,40,2,128,1,fp8,fp8,0,0.2994335889816284
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,40,4,128,1,float16,float16,0,0.337553596496582
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,40,4,128,1,float16,fp8,0,0.2995215892791748
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,40,4,128,1,fp8,fp8,0,0.3000368118286133
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,40,8,128,1,float16,float16,0,0.33277440071105957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,40,8,128,1,float16,fp8,0,0.3007040023803711
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,40,8,128,1,fp8,fp8,0,0.29681921005249023
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,40,40,128,1,float16,float16,0,0.20735681056976318
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,40,40,128,1,float16,fp8,0,0.19049439430236817
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,40,40,128,1,fp8,fp8,0,0.1909327983856201
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,40,1,128,1,float16,float16,0,0.1799183964729309
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,40,1,128,1,float16,fp8,0,0.16904319524765016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,40,1,128,1,fp8,fp8,0,0.16999679803848267
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,40,2,128,1,float16,float16,0,0.1852735996246338
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,40,2,128,1,float16,fp8,0,0.1683791995048523
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,40,2,128,1,fp8,fp8,0,0.16996959447860718
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,40,4,128,1,float16,float16,0,0.18496960401535034
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,40,4,128,1,float16,fp8,0,0.1709887981414795
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,40,4,128,1,fp8,fp8,0,0.16950880289077758
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,40,8,128,1,float16,float16,0,0.18849920034408568
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,40,8,128,1,float16,fp8,0,0.1694208025932312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,40,8,128,1,fp8,fp8,0,0.16843039989471437
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,40,1,128,1,float16,fp8,0,2.4707712173461913
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,40,1,128,1,fp8,fp8,0,2.4798303604125977
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,40,1,128,1,float16,float16,0,3.1870143890380858
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,40,2,128,1,float16,fp8,0,2.485683250427246
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,40,2,128,1,float16,float16,0,2.711199951171875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,40,2,128,1,fp8,fp8,0,2.6035440444946287
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,40,4,128,1,float16,float16,0,2.9885168075561523
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,40,4,128,1,float16,fp8,0,2.67520809173584
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,40,4,128,1,fp8,fp8,0,2.4664655685424806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,40,8,128,1,float16,float16,0,3.594073486328125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,40,40,128,1,float16,float16,0,1.6190080642700195
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,40,8,128,1,fp8,fp8,0,2.464985656738281
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,40,8,128,1,float16,fp8,0,2.6067712783813475
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,40,40,128,1,float16,fp8,0,2.219086456298828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,40,40,128,1,fp8,fp8,0,1.5167776107788087
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,40,1,128,1,float16,float16,0,1.495747184753418
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,40,1,128,1,float16,fp8,0,1.257918357849121
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,40,1,128,1,fp8,fp8,0,1.2615519523620606
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,40,2,128,1,float16,float16,0,1.329430389404297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,40,2,128,1,fp8,fp8,0,1.2636704444885254
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,40,2,128,1,float16,fp8,0,1.8237712860107422
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,40,4,128,1,float16,float16,0,1.335209560394287
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,40,4,128,1,float16,fp8,0,1.6050928115844727
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,40,4,128,1,fp8,fp8,0,1.2567919731140136
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,40,8,128,1,float16,float16,0,1.3663951873779296
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,40,8,128,1,float16,fp8,0,1.3419568061828613
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,40,40,128,1,float16,float16,0,0.8225919723510742
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,40,8,128,1,fp8,fp8,0,1.2515071868896483
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,40,40,128,1,float16,fp8,0,0.8101648330688477
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,40,40,128,1,fp8,fp8,0,0.8102800369262695
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,40,1,128,1,float16,float16,0,0.6929999828338623
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,40,1,128,1,float16,fp8,0,0.6527935981750488
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,40,1,128,1,fp8,fp8,0,0.6606128215789795
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,40,2,128,1,float16,float16,0,0.6899007797241211
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,40,2,128,1,float16,fp8,0,0.8452095985412598
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,40,2,128,1,fp8,fp8,0,0.6501984119415283
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,40,4,128,1,float16,float16,0,0.6994880199432373
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,40,4,128,1,float16,fp8,0,0.6475647926330567
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,40,4,128,1,fp8,fp8,0,0.798470401763916
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,40,8,128,1,float16,float16,0,0.715392017364502
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,40,8,128,1,float16,fp8,0,0.6466063976287841
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,40,8,128,1,fp8,fp8,0,0.6466224193572998
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,40,40,128,1,float16,float16,0,0.4322336196899414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,40,40,128,1,float16,fp8,0,0.40091838836669924
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,40,40,128,1,fp8,fp8,0,0.3983504056930542
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,40,1,128,1,float16,float16,0,0.37044639587402345
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,40,1,128,1,float16,fp8,0,0.3449120044708252
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,40,1,128,1,fp8,fp8,0,0.3483311891555786
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,40,2,128,1,float16,float16,0,0.3717695951461792
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,40,2,128,1,float16,fp8,0,0.3478672027587891
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,40,2,128,1,fp8,fp8,0,0.3429088115692139
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,40,4,128,1,float16,float16,0,0.37519838809967043
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,40,4,128,1,float16,fp8,0,0.34615681171417234
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,40,4,128,1,fp8,fp8,0,0.34678239822387696
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,40,8,128,1,float16,float16,0,0.3773296117782593
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,40,8,128,1,float16,fp8,0,0.34633760452270507
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,40,8,128,1,fp8,fp8,0,0.3447007894515991
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,40,40,128,1,float16,float16,0,0.2397167921066284
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,40,40,128,1,float16,fp8,0,0.21749439239501953
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,40,1,128,1,float16,float16,0,0.20710399150848388
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,40,1,128,1,float16,fp8,0,0.19004640579223633
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,40,40,128,1,fp8,fp8,0,0.2197887897491455
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,40,1,128,1,fp8,fp8,0,0.18914400339126586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,40,2,128,1,float16,float16,0,0.20650238990783693
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,40,2,128,1,float16,fp8,0,0.1905359983444214
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,40,2,128,1,fp8,fp8,0,0.1898751974105835
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,40,4,128,1,float16,float16,0,0.20615038871765137
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,40,4,128,1,float16,fp8,0,0.19065439701080322
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,40,4,128,1,fp8,fp8,0,0.1901311993598938
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,40,8,128,1,float16,float16,0,0.2111743927001953
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,40,8,128,1,float16,fp8,0,0.18972159624099733
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,40,40,128,1,float16,float16,0,0.13647359609603882
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,40,40,128,1,float16,fp8,0,0.1277583956718445
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,40,8,128,1,fp8,fp8,0,0.1915295958518982
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,40,40,128,1,fp8,fp8,0,0.12478079795837402
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,40,1,128,1,float16,float16,0,0.11571359634399414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,40,1,128,1,float16,fp8,0,0.11486879587173462
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,40,1,128,1,fp8,fp8,0,0.11302399635314941
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,40,2,128,1,float16,float16,0,0.11616319417953491
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,40,2,128,1,float16,fp8,0,0.11245759725570678
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,40,2,128,1,fp8,fp8,0,0.11108959913253784
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,40,4,128,1,float16,float16,0,0.11837919950485229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,40,4,128,1,float16,fp8,0,0.1127344012260437
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,40,4,128,1,fp8,fp8,0,0.11280640363693237
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,40,8,128,1,float16,fp8,0,0.11062400341033936
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,40,8,128,1,float16,float16,0,0.12136800289154052
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,40,8,128,1,fp8,fp8,0,0.11075359582901001
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,40,1,128,1,float16,fp8,0,2.5186767578125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,40,1,128,1,fp8,fp8,0,2.5207664489746096
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,40,1,128,1,float16,float16,0,2.941980743408203
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,40,2,128,1,float16,float16,0,2.9993167877197267
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,40,2,128,1,float16,fp8,0,2.515572738647461
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,40,2,128,1,fp8,fp8,0,2.516017532348633
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,40,4,128,1,float16,fp8,0,2.8821887969970703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,40,4,128,1,float16,float16,0,3.782392120361328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,40,4,128,1,fp8,fp8,0,2.5150495529174806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,40,8,128,1,float16,float16,0,3.3355262756347654
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,40,8,128,1,float16,fp8,0,2.5107568740844726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,40,8,128,1,fp8,fp8,0,2.79968318939209
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,40,40,128,1,float16,fp8,0,1.5575008392333984
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,40,40,128,1,float16,float16,0,2.2927120208740233
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,40,40,128,1,fp8,fp8,0,1.5840736389160157
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,40,1,128,1,float16,float16,0,1.2964703559875488
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,40,1,128,1,float16,fp8,0,1.4351712226867677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,40,1,128,1,fp8,fp8,0,1.2742287635803222
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,40,2,128,1,float16,float16,0,1.5014528274536132
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,40,2,128,1,float16,fp8,0,1.2774111747741699
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,40,2,128,1,fp8,fp8,0,1.2717375755310059
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,40,4,128,1,float16,float16,0,1.7965120315551757
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,40,4,128,1,float16,fp8,0,1.2751664161682128
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,40,4,128,1,fp8,fp8,0,1.2729392051696777
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,40,8,128,1,float16,float16,0,1.3575535774230958
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,40,40,128,1,float16,float16,0,0.8667519569396973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,40,8,128,1,fp8,fp8,0,1.2718576431274413
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,40,8,128,1,float16,fp8,0,1.5320768356323242
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,40,40,128,1,float16,fp8,0,1.0730688095092773
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,40,40,128,1,fp8,fp8,0,0.7951231956481933
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,40,1,128,1,float16,float16,0,0.6818655967712403
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,40,1,128,1,float16,fp8,0,0.6547984123229981
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,40,1,128,1,fp8,fp8,0,0.6781439781188965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,40,2,128,1,float16,fp8,0,0.6605199813842774
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,40,2,128,1,float16,float16,0,0.6900815963745117
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,40,2,128,1,fp8,fp8,0,0.6549615859985352
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,40,4,128,1,float16,float16,0,0.6880655765533448
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,40,4,128,1,float16,fp8,0,0.6623760223388672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,40,4,128,1,fp8,fp8,0,0.6549600124359131
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,40,8,128,1,float16,float16,0,0.6989327907562256
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,40,8,128,1,float16,fp8,0,0.6501791954040528
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,40,40,128,1,float16,fp8,0,0.42566561698913574
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,40,40,128,1,float16,float16,0,0.4500576019287109
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,40,8,128,1,fp8,fp8,0,0.6520768165588379
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,40,40,128,1,fp8,fp8,0,0.42119998931884767
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,40,1,128,1,float16,float16,0,0.35593600273132325
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,40,1,128,1,float16,fp8,0,0.3445280075073242
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,40,1,128,1,fp8,fp8,0,0.3442431926727295
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,40,2,128,1,float16,float16,0,0.35940001010894773
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,40,2,128,1,float16,fp8,0,0.34170401096343994
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,40,2,128,1,fp8,fp8,0,0.3449359893798828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,40,4,128,1,float16,float16,0,0.36520159244537354
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,40,4,128,1,float16,fp8,0,0.3439215898513794
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,40,4,128,1,fp8,fp8,0,0.34202880859375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,40,8,128,1,float16,fp8,0,0.3416703939437866
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,40,8,128,1,float16,float16,0,0.3720495939254761
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,40,40,128,1,float16,float16,0,0.23920159339904784
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,40,8,128,1,fp8,fp8,0,0.34047200679779055
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,40,40,128,1,float16,fp8,0,0.2234544038772583
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,40,40,128,1,fp8,fp8,0,0.22239999771118163
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,40,1,128,1,float16,fp8,0,0.18785120248794557
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,40,1,128,1,float16,float16,0,0.19566719532012938
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,40,1,128,1,fp8,fp8,0,0.18635040521621704
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,40,2,128,1,float16,float16,0,0.19663519859313966
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,40,2,128,1,fp8,fp8,0,0.18534560203552247
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,40,2,128,1,float16,fp8,0,0.18668639659881592
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,40,4,128,1,float16,float16,0,0.19923839569091797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,40,4,128,1,float16,fp8,0,0.18713760375976562
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,40,4,128,1,fp8,fp8,0,0.18466880321502685
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,40,8,128,1,float16,float16,0,0.20256640911102294
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,40,8,128,1,float16,fp8,0,0.1869711995124817
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,40,8,128,1,fp8,fp8,0,0.18693280220031738
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,40,40,128,1,float16,float16,0,0.13392000198364257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,40,40,128,1,float16,fp8,0,0.12523679733276366
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,40,40,128,1,fp8,fp8,0,0.1244928002357483
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,40,1,128,1,float16,float16,0,0.11011359691619874
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,40,1,128,1,float16,fp8,0,0.10539360046386718
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,40,1,128,1,fp8,fp8,0,0.10518720149993896
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,40,2,128,1,float16,float16,0,0.10967999696731567
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,40,2,128,1,fp8,fp8,0,0.1055232048034668
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,40,2,128,1,float16,fp8,0,0.10456639528274536
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,40,4,128,1,float16,float16,0,0.11059839725494384
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,40,4,128,1,float16,fp8,0,0.10510720014572143
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,40,4,128,1,fp8,fp8,0,0.10443359613418579
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,40,8,128,1,float16,float16,0,0.11524159908294677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,40,8,128,1,float16,fp8,0,0.10387359857559204
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,40,8,128,1,fp8,fp8,0,0.10530400276184082
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,40,40,128,1,float16,float16,0,0.07871040105819702
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,40,40,128,1,fp8,fp8,0,0.0731823980808258
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,40,40,128,1,float16,fp8,0,0.0739184021949768
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,40,1,128,1,float16,float16,0,0.06724640130996704
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,40,1,128,1,float16,fp8,0,0.06376640200614929
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,40,1,128,1,fp8,fp8,0,0.06425600051879883
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,40,2,128,1,float16,float16,0,0.06765120029449463
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,40,2,128,1,float16,fp8,0,0.06399520039558411
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,40,2,128,1,fp8,fp8,0,0.0638704001903534
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,40,4,128,1,float16,float16,0,0.06761919856071472
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,40,4,128,1,float16,fp8,0,0.06405119895935059
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,40,4,128,1,fp8,fp8,0,0.06405280232429504
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,40,8,128,1,float16,float16,0,0.06804800033569336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,40,8,128,1,float16,fp8,0,0.06407840251922607
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,40,8,128,1,fp8,fp8,0,0.06373599767684937
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,40,1,128,1,float16,float16,0,1.621790313720703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,40,1,128,1,float16,fp8,0,1.6106128692626953
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,40,1,128,1,fp8,fp8,0,1.6056127548217773
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,40,2,128,1,float16,float16,0,1.6031328201293946
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,40,2,128,1,fp8,fp8,0,1.6076080322265625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,40,2,128,1,float16,fp8,0,1.6372671127319336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,40,4,128,1,float16,float16,0,1.6290895462036132
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,40,4,128,1,float16,fp8,0,1.7343616485595703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,40,4,128,1,fp8,fp8,0,1.606585693359375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,40,8,128,1,float16,fp8,0,1.6043231964111329
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,40,8,128,1,float16,float16,0,1.8094160079956054
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,40,40,128,1,float16,float16,0,1.1335599899291993
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,40,40,128,1,float16,fp8,0,1.199062442779541
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,40,8,128,1,fp8,fp8,0,1.6054288864135742
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,40,1,128,1,float16,float16,0,0.8306143760681153
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,40,40,128,1,fp8,fp8,0,1.0638799667358398
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,40,1,128,1,float16,fp8,0,0.8196895599365235
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,40,1,128,1,fp8,fp8,0,0.8221599578857421
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,40,2,128,1,float16,float16,0,0.8273200035095215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,40,2,128,1,float16,fp8,0,0.8187088012695313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,40,2,128,1,fp8,fp8,0,0.8195455551147461
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,40,4,128,1,float16,fp8,0,0.8183135986328125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,40,4,128,1,float16,float16,0,0.8573151588439941
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,40,4,128,1,fp8,fp8,0,0.8198863983154296
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,40,8,128,1,float16,fp8,0,0.8152815818786621
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,40,8,128,1,float16,float16,0,0.866859245300293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,40,8,128,1,fp8,fp8,0,1.022379207611084
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,40,40,128,1,float16,float16,0,0.5748335838317871
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,40,40,128,1,float16,fp8,0,0.52915358543396
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,40,40,128,1,fp8,fp8,0,0.5290080070495605
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,40,1,128,1,float16,float16,0,0.5423312187194824
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,40,1,128,1,float16,fp8,0,0.42593917846679685
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,40,1,128,1,fp8,fp8,0,0.4245296001434326
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,40,2,128,1,float16,float16,0,0.43087358474731446
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,40,2,128,1,float16,fp8,0,0.42404799461364745
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,40,2,128,1,fp8,fp8,0,0.4243152141571045
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,40,4,128,1,float16,float16,0,0.43633441925048827
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,40,4,128,1,float16,fp8,0,0.4229599952697754
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,40,4,128,1,fp8,fp8,0,0.42403521537780764
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,40,8,128,1,float16,float16,0,0.455401611328125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,40,8,128,1,float16,fp8,0,0.42241921424865725
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,40,40,128,1,float16,float16,0,0.29984800815582274
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,40,8,128,1,fp8,fp8,0,0.4207183837890625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,40,40,128,1,float16,fp8,0,0.2796288013458252
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,40,1,128,1,float16,float16,0,0.23161919116973878
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,40,40,128,1,fp8,fp8,0,0.27935519218444826
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,40,1,128,1,float16,fp8,0,0.2257535934448242
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,40,1,128,1,fp8,fp8,0,0.22653279304504395
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,40,2,128,1,float16,float16,0,0.22993919849395753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,40,2,128,1,float16,fp8,0,0.22678399085998535
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,40,2,128,1,fp8,fp8,0,0.2248176097869873
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,40,4,128,1,float16,float16,0,0.2364896059036255
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,40,4,128,1,float16,fp8,0,0.2254415988922119
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,40,4,128,1,fp8,fp8,0,0.22441439628601073
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,40,8,128,1,float16,float16,0,0.24093279838562012
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,40,8,128,1,float16,fp8,0,0.22439999580383302
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,40,40,128,1,float16,float16,0,0.16216640472412108
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,40,8,128,1,fp8,fp8,0,0.2253200054168701
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,40,40,128,1,float16,fp8,0,0.15154880285263062
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,40,1,128,1,float16,float16,0,0.1267375946044922
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,40,1,128,1,float16,fp8,0,0.12354880571365356
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,40,40,128,1,fp8,fp8,0,0.15172480344772338
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,40,1,128,1,fp8,fp8,0,0.1218783974647522
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,40,2,128,1,float16,float16,0,0.1260751962661743
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,40,2,128,1,float16,fp8,0,0.12414560317993165
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,40,2,128,1,fp8,fp8,0,0.124236798286438
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,40,4,128,1,float16,float16,0,0.1296064019203186
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,40,4,128,1,float16,fp8,0,0.12146879434585571
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,40,4,128,1,fp8,fp8,0,0.12352639436721802
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,40,8,128,1,float16,float16,0,0.13415520191192626
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,40,8,128,1,float16,fp8,0,0.12238240242004395
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,40,40,128,1,float16,float16,0,0.09300479888916016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,40,8,128,1,fp8,fp8,0,0.12388639450073242
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,40,40,128,1,float16,fp8,0,0.08688960075378419
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,40,40,128,1,fp8,fp8,0,0.08658239841461182
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,40,1,128,1,float16,float16,0,0.07413920164108276
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,40,1,128,1,float16,fp8,0,0.07166079878807068
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,40,1,128,1,fp8,fp8,0,0.07154399752616883
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,40,2,128,1,float16,float16,0,0.0730672001838684
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,40,2,128,1,float16,fp8,0,0.07164160013198853
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,40,2,128,1,fp8,fp8,0,0.07182559967041016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,40,4,128,1,float16,float16,0,0.0746944010257721
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,40,4,128,1,float16,fp8,0,0.07122240066528321
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,40,4,128,1,fp8,fp8,0,0.07288960218429566
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,40,8,128,1,float16,float16,0,0.0746511995792389
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,40,8,128,1,float16,fp8,0,0.0713375985622406
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,40,8,128,1,fp8,fp8,0,0.0708944022655487
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,40,40,128,1,float16,float16,0,0.05502079725265503
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,40,40,128,1,float16,fp8,0,0.053553599119186404
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,40,40,128,1,fp8,fp8,0,0.053673601150512694
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,40,1,128,1,float16,float16,0,0.04889279901981354
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,40,1,128,1,float16,fp8,0,0.04782400131225586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,40,1,128,1,fp8,fp8,0,0.047600001096725464
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,40,2,128,1,float16,float16,0,0.04940159916877747
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,40,2,128,1,float16,fp8,0,0.047723200917243955
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,40,2,128,1,fp8,fp8,0,0.048876801133155824
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,40,4,128,1,float16,float16,0,0.049327999353408813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,40,4,128,1,float16,fp8,0,0.04737119972705841
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,40,8,128,1,float16,float16,0,0.05076959729194641
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,40,4,128,1,fp8,fp8,0,0.04735040068626404
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,40,8,128,1,float16,fp8,0,0.04728319942951202
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,40,8,128,1,fp8,fp8,0,0.04786239862442017
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,40,1,128,1,float16,float16,0,1.718191909790039
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,40,2,128,1,float16,float16,0,1.7097103118896484
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,40,1,128,1,float16,fp8,0,1.748918342590332
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,40,1,128,1,fp8,fp8,0,1.755227279663086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,40,4,128,1,float16,float16,0,1.7325807571411134
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,40,2,128,1,float16,fp8,0,1.9522592544555664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,40,4,128,1,float16,fp8,0,1.7500848770141602
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,40,2,128,1,fp8,fp8,0,1.9300048828125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,40,4,128,1,fp8,fp8,0,1.7516544342041016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,40,8,128,1,float16,float16,0,1.9192495346069336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,40,8,128,1,float16,fp8,0,1.7490144729614259
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,40,40,128,1,float16,fp8,0,1.173964786529541
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,40,40,128,1,float16,float16,0,1.3765711784362793
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,40,8,128,1,fp8,fp8,0,1.7482288360595704
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,40,1,128,1,float16,float16,0,0.9498080253601074
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,40,40,128,1,fp8,fp8,0,1.173579216003418
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,40,1,128,1,float16,fp8,0,0.8915632247924805
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,40,1,128,1,fp8,fp8,0,0.9733136177062989
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,40,2,128,1,float16,float16,0,0.8701104164123535
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,40,2,128,1,float16,fp8,0,0.8895343780517578
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,40,2,128,1,fp8,fp8,0,0.8859951972961426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,40,4,128,1,float16,float16,0,0.8818608283996582
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,40,4,128,1,float16,fp8,0,0.8885536193847656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,40,4,128,1,fp8,fp8,0,0.8883135795593262
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,40,8,128,1,float16,float16,0,0.9226528167724609
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,40,40,128,1,float16,float16,0,0.6438255786895752
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,40,8,128,1,float16,fp8,0,0.9044159889221192
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,40,8,128,1,fp8,fp8,0,0.8862192153930664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,40,40,128,1,float16,fp8,0,0.5973775863647461
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,40,1,128,1,float16,float16,0,0.4680208206176758
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,40,40,128,1,fp8,fp8,0,0.59792160987854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,40,1,128,1,float16,fp8,0,0.45814080238342286
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,40,1,128,1,fp8,fp8,0,0.4562528133392334
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,40,2,128,1,float16,float16,0,0.44701762199401857
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,40,2,128,1,float16,fp8,0,0.4574592113494873
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,40,2,128,1,fp8,fp8,0,0.45806241035461426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,40,4,128,1,float16,float16,0,0.4607151985168457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,40,4,128,1,float16,fp8,0,0.4554895877838135
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,40,4,128,1,fp8,fp8,0,0.4558095932006836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,40,8,128,1,float16,float16,0,0.48253440856933594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,40,8,128,1,float16,fp8,0,0.45316481590270996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,40,8,128,1,fp8,fp8,0,0.4547279834747314
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,40,40,128,1,float16,float16,0,0.3328432083129883
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,40,40,128,1,float16,fp8,0,0.3111135959625244
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,40,1,128,1,float16,float16,0,0.2398591995239258
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,40,1,128,1,fp8,fp8,0,0.23967039585113525
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,40,1,128,1,float16,fp8,0,0.23998079299926758
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,40,40,128,1,fp8,fp8,0,0.3116688013076782
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,40,2,128,1,float16,float16,0,0.23654398918151856
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,40,2,128,1,float16,fp8,0,0.23985600471496582
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,40,4,128,1,float16,float16,0,0.24224319458007812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,40,2,128,1,fp8,fp8,0,0.23992960453033446
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,40,4,128,1,float16,fp8,0,0.23840320110321045
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,40,4,128,1,fp8,fp8,0,0.23924000263214112
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,40,8,128,1,float16,float16,0,0.2520927906036377
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,40,8,128,1,float16,fp8,0,0.23787519931793213
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,40,8,128,1,fp8,fp8,0,0.23742079734802246
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,40,40,128,1,float16,float16,0,0.17742079496383667
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,40,40,128,1,fp8,fp8,0,0.16626559495925902
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,40,40,128,1,float16,fp8,0,0.1672127962112427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,40,1,128,1,float16,float16,0,0.12898880243301392
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,40,1,128,1,float16,fp8,0,0.1299631953239441
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,40,1,128,1,fp8,fp8,0,0.1301360011100769
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,40,2,128,1,float16,float16,0,0.1297327995300293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,40,2,128,1,float16,fp8,0,0.12872159481048584
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,40,2,128,1,fp8,fp8,0,0.12934240102767944
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,40,4,128,1,float16,float16,0,0.13369280099868774
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,40,4,128,1,float16,fp8,0,0.12896959781646727
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,40,4,128,1,fp8,fp8,0,0.12831679582595826
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,40,8,128,1,float16,fp8,0,0.12950559854507446
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,40,8,128,1,float16,float16,0,0.13804160356521605
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,40,8,128,1,fp8,fp8,0,0.12975200414657592
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,40,40,128,1,float16,float16,0,0.09743040204048156
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,40,40,128,1,float16,fp8,0,0.09215999841690063
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,40,1,128,1,float16,float16,0,0.07244480252265931
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,40,40,128,1,fp8,fp8,0,0.0933135986328125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,40,1,128,1,float16,fp8,0,0.07148000001907348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,40,1,128,1,fp8,fp8,0,0.07204800248146057
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,40,2,128,1,float16,float16,0,0.07222880125045776
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,40,2,128,1,float16,fp8,0,0.07197120189666747
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,40,2,128,1,fp8,fp8,0,0.07185279726982116
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,40,4,128,1,float16,float16,0,0.07462720274925232
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,40,4,128,1,float16,fp8,0,0.07244319915771484
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,40,8,128,1,float16,float16,0,0.07731999754905701
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,40,4,128,1,fp8,fp8,0,0.0724560022354126
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,40,8,128,1,float16,fp8,0,0.07191200256347656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,40,8,128,1,fp8,fp8,0,0.07244160175323486
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,40,40,128,1,float16,float16,0,0.056775999069213864
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,40,40,128,1,float16,fp8,0,0.05327519774436951
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,40,40,128,1,fp8,fp8,0,0.05300639867782593
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,40,1,128,1,float16,float16,0,0.044512000679969785
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,40,1,128,1,float16,fp8,0,0.04321439862251282
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,40,1,128,1,fp8,fp8,0,0.04357759952545166
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,40,2,128,1,float16,float16,0,0.04496000111103058
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,40,2,128,1,float16,fp8,0,0.04395039975643158
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,40,4,128,1,float16,float16,0,0.04493120014667511
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,40,2,128,1,fp8,fp8,0,0.043412798643112184
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,40,4,128,1,float16,fp8,0,0.04380159974098206
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,40,4,128,1,fp8,fp8,0,0.04421600103378296
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,40,8,128,1,float16,float16,0,0.04651840031147003
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,40,8,128,1,float16,fp8,0,0.04338720142841339
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,40,8,128,1,fp8,fp8,0,0.0438511997461319
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,40,40,128,1,float16,fp8,0,0.03708159923553467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,40,40,128,1,fp8,fp8,0,0.03707840144634247
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,40,40,128,1,float16,float16,0,0.03631680011749268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,40,1,128,1,float16,float16,0,0.032948800921440126
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,40,1,128,1,float16,fp8,0,0.03294239938259125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,40,1,128,1,fp8,fp8,0,0.03215999901294708
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,40,2,128,1,float16,float16,0,0.033000001311302186
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,40,2,128,1,float16,fp8,0,0.032996800541877744
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,40,2,128,1,fp8,fp8,0,0.032734400033950804
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,40,4,128,1,float16,float16,0,0.032974401116371156
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,40,4,128,1,float16,fp8,0,0.03295199871063233
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,40,4,128,1,fp8,fp8,0,0.03223040103912354
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,40,8,128,1,float16,float16,0,0.03338559865951538
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,40,8,128,1,float16,fp8,0,0.03303360044956207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,40,8,128,1,fp8,fp8,0,0.031948798894882204
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,40,1,128,1,float16,float16,0,1.2841952323913575
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,40,1,128,1,fp8,fp8,0,1.3700223922729493
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,40,1,128,1,float16,fp8,0,1.3722368240356446
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,40,2,128,1,float16,float16,0,1.408299160003662
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,40,2,128,1,float16,fp8,0,1.3656304359436036
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,40,2,128,1,fp8,fp8,0,1.3673407554626464
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,40,4,128,1,float16,float16,0,1.367204761505127
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,40,4,128,1,float16,fp8,0,1.3645808219909668
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,40,4,128,1,fp8,fp8,0,1.3646512031555176
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,40,8,128,1,float16,float16,0,1.4121184349060059
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,40,8,128,1,float16,fp8,0,1.3616352081298828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,40,8,128,1,fp8,fp8,0,1.3621343612670898
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,40,40,128,1,float16,float16,0,1.0298784255981446
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,40,1,128,1,float16,float16,0,0.6565872192382812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,40,1,128,1,float16,fp8,0,0.695251178741455
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,40,40,128,1,float16,fp8,0,1.0515487670898438
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,40,40,128,1,fp8,fp8,0,0.9714431762695312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,40,1,128,1,fp8,fp8,0,0.6972015857696533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,40,2,128,1,float16,float16,0,0.6508031845092773
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,40,2,128,1,float16,fp8,0,0.6934959888458252
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,40,2,128,1,fp8,fp8,0,0.6913936138153076
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,40,4,128,1,float16,float16,0,0.6711904048919678
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,40,4,128,1,fp8,fp8,0,0.6919167995452881
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,40,4,128,1,float16,fp8,0,0.6895487785339356
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,40,8,128,1,float16,float16,0,0.7063920021057128
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,40,8,128,1,float16,fp8,0,0.6905344009399415
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,40,40,128,1,float16,float16,0,0.5277376174926758
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,40,40,128,1,float16,fp8,0,0.49431681632995605
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,40,8,128,1,fp8,fp8,0,0.6893343925476074
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,40,1,128,1,float16,float16,0,0.33847200870513916
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,40,1,128,1,float16,fp8,0,0.3564192056655884
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,40,40,128,1,fp8,fp8,0,0.49704318046569823
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,40,1,128,1,fp8,fp8,0,0.35797278881072997
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,40,2,128,1,float16,float16,0,0.33822081089019773
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,40,2,128,1,float16,fp8,0,0.35487520694732666
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,40,2,128,1,fp8,fp8,0,0.3564703941345215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,40,4,128,1,float16,float16,0,0.349015998840332
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,40,4,128,1,float16,fp8,0,0.3552623987197876
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,40,4,128,1,fp8,fp8,0,0.35457921028137207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,40,8,128,1,float16,fp8,0,0.3540463924407959
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,40,8,128,1,float16,float16,0,0.3669600009918213
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,40,40,128,1,float16,float16,0,0.27161920070648193
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,40,40,128,1,float16,fp8,0,0.25782721042633056
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,40,40,128,1,fp8,fp8,0,0.25768160820007324
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,40,8,128,1,fp8,fp8,0,0.35344638824462893
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,40,1,128,1,float16,float16,0,0.17852319478988649
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,40,1,128,1,fp8,fp8,0,0.18744959831237792
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,40,1,128,1,float16,fp8,0,0.18786720037460328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,40,2,128,1,float16,float16,0,0.1798751950263977
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,40,2,128,1,float16,fp8,0,0.1866943955421448
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,40,2,128,1,fp8,fp8,0,0.18660639524459838
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,40,4,128,1,float16,float16,0,0.1848144054412842
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,40,4,128,1,float16,fp8,0,0.18550560474395753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,40,4,128,1,fp8,fp8,0,0.1855695962905884
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,40,8,128,1,float16,float16,0,0.1942639946937561
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,40,8,128,1,float16,fp8,0,0.18517760038375855
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,40,8,128,1,fp8,fp8,0,0.1855936050415039
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,40,40,128,1,float16,float16,0,0.14388320446014405
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,40,40,128,1,float16,fp8,0,0.13740160465240478
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,40,40,128,1,fp8,fp8,0,0.1374608039855957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,40,1,128,1,float16,float16,0,0.09911999702453614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,40,1,128,1,float16,fp8,0,0.10065439939498902
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,40,1,128,1,fp8,fp8,0,0.10073599815368653
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,40,2,128,1,float16,float16,0,0.09928640127182006
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,40,2,128,1,fp8,fp8,0,0.10045919418334961
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,40,2,128,1,float16,fp8,0,0.10142560005187988
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,40,4,128,1,float16,float16,0,0.10105600357055664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,40,4,128,1,float16,fp8,0,0.10075520277023316
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,40,4,128,1,fp8,fp8,0,0.10067199468612671
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,40,8,128,1,float16,float16,0,0.10643520355224609
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,40,8,128,1,float16,fp8,0,0.10132160186767578
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,40,8,128,1,fp8,fp8,0,0.10130560398101807
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,40,40,128,1,float16,float16,0,0.07875199913978577
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,40,40,128,1,float16,fp8,0,0.07610560059547425
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,40,40,128,1,fp8,fp8,0,0.07682560086250305
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,40,1,128,1,float16,float16,0,0.05477759838104248
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,40,1,128,1,float16,fp8,0,0.05566560029983521
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,40,2,128,1,float16,float16,0,0.055193597078323366
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,40,2,128,1,float16,fp8,0,0.05610079765319824
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,40,1,128,1,fp8,fp8,0,0.05561439990997315
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,40,2,128,1,fp8,fp8,0,0.05567520260810852
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,40,4,128,1,float16,float16,0,0.05599679946899414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,40,4,128,1,float16,fp8,0,0.056032001972198486
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,40,4,128,1,fp8,fp8,0,0.055587202310562134
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,40,8,128,1,float16,float16,0,0.059617602825164796
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,40,8,128,1,float16,fp8,0,0.05596640110015869
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,40,8,128,1,fp8,fp8,0,0.05628640055656433
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,40,40,128,1,float16,fp8,0,0.04496960043907165
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,40,40,128,1,float16,float16,0,0.047358399629592894
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,40,40,128,1,fp8,fp8,0,0.043750399351119997
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,40,1,128,1,float16,float16,0,0.03497759997844696
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,40,1,128,1,float16,fp8,0,0.03516480028629303
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,40,1,128,1,fp8,fp8,0,0.0350735992193222
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,40,2,128,1,float16,float16,0,0.0350383996963501
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,40,2,128,1,float16,fp8,0,0.0350847989320755
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,40,2,128,1,fp8,fp8,0,0.035076799988746646
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,40,4,128,1,float16,float16,0,0.035043200850486754
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,40,4,128,1,fp8,fp8,0,0.03574079871177673
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,40,4,128,1,float16,fp8,0,0.03528960049152374
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,40,8,128,1,float16,float16,0,0.03694399893283844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,40,8,128,1,float16,fp8,0,0.035427200794219973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,40,8,128,1,fp8,fp8,0,0.035087999701499936
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,40,40,128,1,float16,float16,0,0.028911998867988585
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,40,40,128,1,float16,fp8,0,0.031035199761390686
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,40,40,128,1,fp8,fp8,0,0.031043198704719544
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,40,1,128,1,float16,float16,0,0.02677280008792877
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,40,1,128,1,float16,fp8,0,0.026819199323654175
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,40,1,128,1,fp8,fp8,0,0.026788800954818726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,40,2,128,1,float16,float16,0,0.026844799518585205
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,40,2,128,1,float16,fp8,0,0.026932799816131593
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,40,4,128,1,float16,float16,0,0.026788800954818726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,40,2,128,1,fp8,fp8,0,0.02683840095996857
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,40,4,128,1,float16,fp8,0,0.026919999718666078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,40,8,128,1,float16,float16,0,0.026918399333953857
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,40,8,128,1,float16,fp8,0,0.026899200677871705
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,40,4,128,1,fp8,fp8,0,0.026791998744010927
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,40,40,128,1,float16,float16,0,0.020791999995708466
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,40,40,128,1,float16,fp8,0,0.02066880017518997
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,40,8,128,1,fp8,fp8,0,0.026915198564529418
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,40,1,128,1,float16,float16,0,0.018617600202560425
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,40,40,128,1,fp8,fp8,0,0.02077919989824295
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,40,1,128,1,float16,fp8,0,0.018772800266742707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,40,1,128,1,fp8,fp8,0,0.01860000044107437
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,40,2,128,1,float16,float16,0,0.018718400597572328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,40,2,128,1,float16,fp8,0,0.01873600035905838
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,40,2,128,1,fp8,fp8,0,0.01860480010509491
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,40,4,128,1,float16,float16,0,0.018747200071811677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,40,4,128,1,float16,fp8,0,0.018726399540901183
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,40,4,128,1,fp8,fp8,0,0.018624000251293182
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,40,8,128,1,float16,float16,0,0.018812799453735353
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,40,8,128,1,float16,fp8,0,0.018806399405002595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,40,8,128,1,fp8,fp8,0,0.01860959976911545
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,40,1,128,1,float16,float16,0,0.5385759830474853
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,40,1,128,1,float16,fp8,0,0.5885488033294678
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,40,1,128,1,fp8,fp8,0,0.588915205001831
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,40,2,128,1,float16,float16,0,0.5369311809539795
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,40,2,128,1,float16,fp8,0,0.5861216068267823
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,40,2,128,1,fp8,fp8,0,0.5874303817749024
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,40,4,128,1,float16,float16,0,0.5549791812896728
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,40,4,128,1,float16,fp8,0,0.5848063945770263
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,40,4,128,1,fp8,fp8,0,0.5860527992248535
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,40,8,128,1,float16,float16,0,0.5899231910705567
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,40,8,128,1,float16,fp8,0,0.5838960170745849
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,40,8,128,1,fp8,fp8,0,0.5846176147460938
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,40,40,128,1,float16,float16,0,0.4649792194366455
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,40,40,128,1,float16,fp8,0,0.4400608062744141
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,40,40,128,1,fp8,fp8,0,0.43890881538391113
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,40,1,128,1,float16,float16,0,0.2768239974975586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,40,1,128,1,float16,fp8,0,0.2999295949935913
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,40,1,128,1,fp8,fp8,0,0.30117919445037844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,40,2,128,1,float16,float16,0,0.276360011100769
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,40,2,128,1,float16,fp8,0,0.29957919120788573
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,40,4,128,1,float16,float16,0,0.28567359447479246
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,40,2,128,1,fp8,fp8,0,0.2997503995895386
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,40,4,128,1,float16,fp8,0,0.29873759746551515
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,40,4,128,1,fp8,fp8,0,0.3004096031188965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,40,8,128,1,float16,float16,0,0.3036479949951172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,40,8,128,1,float16,fp8,0,0.3000416040420532
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,40,40,128,1,float16,float16,0,0.23858239650726318
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,40,8,128,1,fp8,fp8,0,0.29943840503692626
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,40,40,128,1,float16,fp8,0,0.22646560668945312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,40,40,128,1,fp8,fp8,0,0.22619199752807617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,40,1,128,1,float16,float16,0,0.1457535982131958
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,40,1,128,1,float16,fp8,0,0.15525120496749878
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,40,1,128,1,fp8,fp8,0,0.1559424042701721
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,40,2,128,1,float16,float16,0,0.14428479671478273
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,40,2,128,1,float16,fp8,0,0.15580960512161254
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,40,2,128,1,fp8,fp8,0,0.1556656002998352
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,40,4,128,1,float16,float16,0,0.15036799907684326
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,40,4,128,1,float16,fp8,0,0.15500320196151735
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,40,4,128,1,fp8,fp8,0,0.1559440016746521
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,40,8,128,1,float16,fp8,0,0.1550816059112549
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,40,8,128,1,float16,float16,0,0.15834720134735109
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,40,8,128,1,fp8,fp8,0,0.1544559955596924
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,40,40,128,1,float16,float16,0,0.1275215983390808
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,40,40,128,1,float16,fp8,0,0.12123039960861207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,40,40,128,1,fp8,fp8,0,0.12198079824447632
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,40,1,128,1,float16,float16,0,0.08047040104866028
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,40,1,128,1,float16,fp8,0,0.0864736020565033
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,40,1,128,1,fp8,fp8,0,0.08607040047645569
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,40,2,128,1,float16,float16,0,0.08107200264930725
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,40,2,128,1,float16,fp8,0,0.08489919900894165
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,40,2,128,1,fp8,fp8,0,0.08630560040473938
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,40,4,128,1,float16,float16,0,0.08427839875221252
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,40,4,128,1,float16,fp8,0,0.08636800050735474
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,40,4,128,1,fp8,fp8,0,0.08517280220985413
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,40,8,128,1,float16,float16,0,0.08919039964675904
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,40,8,128,1,float16,fp8,0,0.08618879914283753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,40,8,128,1,fp8,fp8,0,0.08567519783973694
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,40,40,128,1,float16,float16,0,0.07195039987564086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,40,40,128,1,float16,fp8,0,0.06988160014152527
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,40,40,128,1,fp8,fp8,0,0.06958079934120179
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,40,1,128,1,float16,float16,0,0.047302401065826415
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,40,1,128,1,float16,fp8,0,0.04991999864578247
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,40,1,128,1,fp8,fp8,0,0.04939199984073639
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,40,2,128,1,float16,float16,0,0.04728319942951202
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,40,2,128,1,float16,fp8,0,0.049420800805091855
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,40,2,128,1,fp8,fp8,0,0.049446401000022885
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,40,4,128,1,float16,float16,0,0.04826880097389221
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,40,4,128,1,float16,fp8,0,0.04941920042037964
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,40,4,128,1,fp8,fp8,0,0.04942240118980408
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,40,8,128,1,float16,float16,0,0.051641601324081424
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,40,8,128,1,float16,fp8,0,0.049558401107788086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,40,8,128,1,fp8,fp8,0,0.04960319995880127
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,40,40,128,1,float16,float16,0,0.04333919882774353
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,40,40,128,1,float16,fp8,0,0.04115679860115051
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,40,40,128,1,fp8,fp8,0,0.04116480052471161
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,40,1,128,1,float16,float16,0,0.030873599648475646
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,40,1,128,1,float16,fp8,0,0.03287839889526367
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,40,1,128,1,fp8,fp8,0,0.03291040062904358
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,40,2,128,1,float16,float16,0,0.03091999888420105
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,40,2,128,1,float16,fp8,0,0.03287999927997589
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,40,2,128,1,fp8,fp8,0,0.03293280005455017
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,40,4,128,1,float16,float16,0,0.03094559907913208
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,40,4,128,1,float16,fp8,0,0.03288480043411255
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,40,4,128,1,fp8,fp8,0,0.033004799485206605
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,40,8,128,1,float16,float16,0,0.03297759890556336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,40,8,128,1,float16,fp8,0,0.032888001203536986
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,40,8,128,1,fp8,fp8,0,0.032950401306152344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,40,40,128,1,float16,float16,0,0.02659519910812378
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,40,40,128,1,float16,fp8,0,0.028697600960731505
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,40,1,128,1,float16,float16,0,0.022808000445365906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,40,40,128,1,fp8,fp8,0,0.027385601401329042
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,40,1,128,1,float16,fp8,0,0.02523840069770813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,40,1,128,1,fp8,fp8,0,0.022843199968338012
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,40,2,128,1,float16,float16,0,0.02284960001707077
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,40,2,128,1,float16,fp8,0,0.02465759962797165
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,40,2,128,1,fp8,fp8,0,0.02280000001192093
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,40,4,128,1,float16,float16,0,0.022767999768257143
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,40,4,128,1,float16,fp8,0,0.02446720004081726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,40,4,128,1,fp8,fp8,0,0.022793599963188173
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,40,8,128,1,float16,float16,0,0.02462719976902008
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,40,8,128,1,float16,fp8,0,0.024699200689792634
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,40,40,128,1,float16,float16,0,0.01860159933567047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,40,8,128,1,fp8,fp8,0,0.023737600445747374
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,40,40,128,1,float16,fp8,0,0.01860480010509491
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,40,40,128,1,fp8,fp8,0,0.018750399351119995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,40,1,128,1,float16,float16,0,0.016551999747753142
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,40,1,128,1,float16,fp8,0,0.01652639955282211
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,40,1,128,1,fp8,fp8,0,0.016553600132465363
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,40,2,128,1,float16,float16,0,0.01661120057106018
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,40,2,128,1,float16,fp8,0,0.01650079935789108
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,40,2,128,1,fp8,fp8,0,0.016572800278663636
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,40,4,128,1,float16,float16,0,0.016547200083732606
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,40,4,128,1,float16,fp8,0,0.016527999937534333
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,40,4,128,1,fp8,fp8,0,0.016761599481105803
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,40,8,128,1,float16,float16,0,0.016598400473594666
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,40,8,128,1,float16,fp8,0,0.016672000288963318
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,40,8,128,1,fp8,fp8,0,0.01669919937849045
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,40,40,128,1,float16,fp8,0,0.01671359986066818
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,40,40,128,1,float16,float16,0,0.01656319946050644
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,40,40,128,1,fp8,fp8,0,0.016689600050449373
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,40,1,128,1,float16,float16,0,0.016487999260425566
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,40,1,128,1,float16,fp8,0,0.014628799259662628
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,40,1,128,1,fp8,fp8,0,0.016551999747753142
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,40,2,128,1,float16,float16,0,0.016492800414562227
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,40,2,128,1,float16,fp8,0,0.014641599357128143
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,40,2,128,1,fp8,fp8,0,0.01642719954252243
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,40,4,128,1,float16,float16,0,0.016622400283813475
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,40,4,128,1,float16,fp8,0,0.015172800421714783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,40,4,128,1,fp8,fp8,0,0.016465599834918975
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,40,8,128,1,float16,float16,0,0.016515199840068818
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,40,8,128,1,float16,fp8,0,0.014929600059986115
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,40,8,128,1,fp8,fp8,0,0.016574400663375854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,40,1,128,1,float16,float16,0,0.3282639980316162
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,40,1,128,1,float16,fp8,0,0.3514224052429199
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,40,1,128,1,fp8,fp8,0,0.3518847942352295
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,40,2,128,1,float16,float16,0,0.32845919132232665
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,40,2,128,1,float16,fp8,0,0.3506464004516602
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,40,2,128,1,fp8,fp8,0,0.35077600479125975
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,40,4,128,1,float16,float16,0,0.3365391969680786
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,40,4,128,1,float16,fp8,0,0.35035679340362547
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,40,4,128,1,fp8,fp8,0,0.35046560764312745
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,40,8,128,1,float16,float16,0,0.3548703908920288
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,40,8,128,1,float16,fp8,0,0.350547194480896
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,40,8,128,1,fp8,fp8,0,0.3495039939880371
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,40,40,128,1,float16,float16,0,0.26036319732666013
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,40,40,128,1,float16,fp8,0,0.25032958984375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,40,40,128,1,fp8,fp8,0,0.2516304016113281
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,40,1,128,1,float16,float16,0,0.1706223964691162
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,40,1,128,1,float16,fp8,0,0.18077600002288818
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,40,1,128,1,fp8,fp8,0,0.18099360466003417
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,40,2,128,1,float16,float16,0,0.17042880058288573
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,40,2,128,1,float16,fp8,0,0.1806175947189331
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,40,2,128,1,fp8,fp8,0,0.18082239627838134
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,40,4,128,1,float16,float16,0,0.17487200498580932
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,40,4,128,1,float16,fp8,0,0.18058079481124878
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,40,4,128,1,fp8,fp8,0,0.18060959577560426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,40,8,128,1,float16,float16,0,0.18336000442504882
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,40,8,128,1,float16,fp8,0,0.1806048035621643
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,40,8,128,1,fp8,fp8,0,0.1809216022491455
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,40,40,128,1,float16,float16,0,0.13452320098876952
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,40,40,128,1,float16,fp8,0,0.13136160373687744
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,40,40,128,1,fp8,fp8,0,0.13020800352096557
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,40,1,128,1,float16,float16,0,0.09061920046806335
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,40,1,128,1,float16,fp8,0,0.09628159999847412
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,40,1,128,1,fp8,fp8,0,0.09451680183410645
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,40,2,128,1,float16,float16,0,0.09066560268402099
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,40,2,128,1,float16,fp8,0,0.09580479860305786
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,40,2,128,1,fp8,fp8,0,0.09522560238838196
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,40,4,128,1,float16,fp8,0,0.09634400010108948
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,40,4,128,1,float16,float16,0,0.0927071988582611
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,40,4,128,1,fp8,fp8,0,0.09478880167007446
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,40,8,128,1,float16,float16,0,0.09880319833755494
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,40,8,128,1,float16,fp8,0,0.09545120000839233
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,40,8,128,1,fp8,fp8,0,0.09553599953651429
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,40,40,128,1,float16,float16,0,0.07345119714736939
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,40,40,128,1,float16,fp8,0,0.0719215989112854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,40,1,128,1,float16,float16,0,0.05129439830780029
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,40,40,128,1,fp8,fp8,0,0.07188799977302551
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,40,1,128,1,float16,fp8,0,0.05243359804153443
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,40,2,128,1,float16,float16,0,0.049558401107788086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,40,1,128,1,fp8,fp8,0,0.053495997190475465
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,40,2,128,1,float16,fp8,0,0.05227360129356384
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,40,2,128,1,fp8,fp8,0,0.0533519983291626
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,40,4,128,1,float16,float16,0,0.05230720043182373
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,40,4,128,1,float16,fp8,0,0.05252000093460083
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,40,4,128,1,fp8,fp8,0,0.053179198503494264
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,40,8,128,1,float16,float16,0,0.05536800026893616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,40,8,128,1,float16,fp8,0,0.05291360020637512
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,40,8,128,1,fp8,fp8,0,0.05335999727249145
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,40,40,128,1,float16,float16,0,0.043219199776649474
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,40,1,128,1,float16,float16,0,0.030908799171447753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,40,40,128,1,fp8,fp8,0,0.04086880087852478
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,40,40,128,1,float16,fp8,0,0.040780800580978396
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,40,1,128,1,float16,fp8,0,0.031332799792289735
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,40,1,128,1,fp8,fp8,0,0.03099200129508972
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,40,2,128,1,float16,float16,0,0.03094879984855652
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,40,2,128,1,float16,fp8,0,0.032543998956680295
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,40,2,128,1,fp8,fp8,0,0.031134399771690368
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,40,4,128,1,float16,float16,0,0.031083199381828307
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,40,4,128,1,float16,fp8,0,0.03285120129585266
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,40,4,128,1,fp8,fp8,0,0.031071999669075014
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,40,8,128,1,float16,fp8,0,0.03278239965438843
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,40,8,128,1,float16,float16,0,0.03288480043411255
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,40,8,128,1,fp8,fp8,0,0.030964800715446474
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,40,40,128,1,float16,float16,0,0.024747200310230255
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,40,40,128,1,float16,fp8,0,0.026612800359725953
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,40,1,128,1,float16,float16,0,0.02080159932374954
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,40,40,128,1,fp8,fp8,0,0.026505601406097413
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,40,1,128,1,float16,fp8,0,0.02191520035266876
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,40,1,128,1,fp8,fp8,0,0.022065599262714387
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,40,2,128,1,float16,float16,0,0.02067520022392273
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,40,2,128,1,float16,fp8,0,0.02197439968585968
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,40,2,128,1,fp8,fp8,0,0.022124800086021423
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,40,4,128,1,float16,float16,0,0.02072480022907257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,40,4,128,1,float16,fp8,0,0.02086720019578934
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,40,4,128,1,fp8,fp8,0,0.021908800303936004
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,40,8,128,1,float16,float16,0,0.022686399519443512
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,40,8,128,1,float16,fp8,0,0.021084800362586975
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,40,8,128,1,fp8,fp8,0,0.022078399360179902
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,40,40,128,1,float16,fp8,0,0.018590399622917177
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,40,40,128,1,float16,float16,0,0.018643200397491455
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,40,40,128,1,fp8,fp8,0,0.018695999681949616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,40,1,128,1,float16,float16,0,0.016499200463294984
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,40,1,128,1,float16,fp8,0,0.01658560037612915
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,40,1,128,1,fp8,fp8,0,0.016707199811935424
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,40,2,128,1,float16,float16,0,0.01658719927072525
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,40,2,128,1,float16,fp8,0,0.0166143998503685
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,40,2,128,1,fp8,fp8,0,0.016684800386428833
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,40,4,128,1,float16,float16,0,0.01658080071210861
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,40,4,128,1,float16,fp8,0,0.016595199704170227
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,40,4,128,1,fp8,fp8,0,0.016702400147914888
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,40,8,128,1,float16,float16,0,0.016553600132465363
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,40,8,128,1,float16,fp8,0,0.016574400663375854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,40,8,128,1,fp8,fp8,0,0.016735999286174773
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,40,40,128,1,float16,float16,0,0.014455999433994293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,40,40,128,1,float16,fp8,0,0.01404159963130951
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,40,40,128,1,fp8,fp8,0,0.014473600685596466
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,40,1,128,1,float16,float16,0,0.012491200119256973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,40,1,128,1,float16,fp8,0,0.012488000094890594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,40,1,128,1,fp8,fp8,0,0.012577599287033081
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,40,2,128,1,float16,float16,0,0.012462399899959564
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,40,2,128,1,float16,fp8,0,0.012451200187206269
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,40,2,128,1,fp8,fp8,0,0.012583999335765839
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,40,4,128,1,float16,fp8,0,0.012481600046157837
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,40,4,128,1,fp8,fp8,0,0.012531200051307678
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,40,4,128,1,float16,float16,0,0.01252480000257492
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,40,8,128,1,float16,float16,0,0.012481600046157837
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,40,8,128,1,float16,fp8,0,0.01255040019750595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,40,8,128,1,fp8,fp8,0,0.012484800070524216
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,40,40,128,1,float16,float16,0,0.012464000284671784
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,40,40,128,1,float16,fp8,0,0.012559999525547028
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,40,40,128,1,fp8,fp8,0,0.012467200309038163
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,40,1,128,1,float16,float16,0,0.01247360035777092
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,40,1,128,1,float16,fp8,0,0.012368000298738479
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,40,1,128,1,fp8,fp8,0,0.012392000108957291
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,40,2,128,1,float16,float16,0,0.012452799826860428
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,40,2,128,1,float16,fp8,0,0.012382400035858155
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,40,2,128,1,fp8,fp8,0,0.012462399899959564
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,40,4,128,1,float16,float16,0,0.01244639977812767
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,40,4,128,1,float16,fp8,0,0.012548799812793731
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,40,4,128,1,fp8,fp8,0,0.012374400347471236
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,40,8,128,1,float16,float16,0,0.012452799826860428
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,40,8,128,1,float16,fp8,0,0.0124719999730587
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,40,8,128,1,fp8,fp8,0,0.012436799705028534
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,40,1,128,1,float16,float16,0,0.2528752088546753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,40,1,128,1,float16,fp8,0,0.2636336088180542
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,40,1,128,1,fp8,fp8,0,0.26328001022338865
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,40,2,128,1,float16,float16,0,0.2531039953231812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,40,2,128,1,float16,fp8,0,0.2628720045089722
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,40,2,128,1,fp8,fp8,0,0.26263999938964844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,40,4,128,1,float16,float16,0,0.258243203163147
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,40,4,128,1,float16,fp8,0,0.2623136043548584
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,40,4,128,1,fp8,fp8,0,0.26232481002807617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,40,8,128,1,float16,float16,0,0.26758880615234376
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,40,8,128,1,float16,fp8,0,0.262441611289978
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,40,8,128,1,fp8,fp8,0,0.26177120208740234
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,40,40,128,1,float16,float16,0,0.17562559843063355
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,40,40,128,1,float16,fp8,0,0.1705008029937744
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,40,1,128,1,float16,float16,0,0.13369920253753662
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,40,40,128,1,fp8,fp8,0,0.1721951961517334
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,40,1,128,1,float16,fp8,0,0.13589600324630738
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,40,1,128,1,fp8,fp8,0,0.13689440488815308
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,40,2,128,1,float16,float16,0,0.13224480152130128
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,40,2,128,1,float16,fp8,0,0.1358288049697876
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,40,2,128,1,fp8,fp8,0,0.1365455985069275
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,40,4,128,1,float16,float16,0,0.1344256043434143
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,40,4,128,1,float16,fp8,0,0.1356063961982727
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,40,4,128,1,fp8,fp8,0,0.13672959804534912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,40,8,128,1,float16,fp8,0,0.13547519445419312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,40,8,128,1,float16,float16,0,0.13912639617919922
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,40,8,128,1,fp8,fp8,0,0.1358016014099121
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,40,40,128,1,float16,float16,0,0.0918079972267151
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,40,40,128,1,float16,fp8,0,0.09057760238647461
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,40,40,128,1,fp8,fp8,0,0.09107360243797302
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,40,1,128,1,float16,float16,0,0.07001919746398926
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,40,1,128,1,float16,fp8,0,0.07231519818305969
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,40,1,128,1,fp8,fp8,0,0.07190880179405212
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,40,2,128,1,float16,float16,0,0.07011839747428894
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,40,2,128,1,float16,fp8,0,0.07183359861373902
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,40,2,128,1,fp8,fp8,0,0.07199199795722962
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,40,4,128,1,float16,float16,0,0.07205280065536498
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,40,4,128,1,float16,fp8,0,0.07194079756736756
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,40,4,128,1,fp8,fp8,0,0.07192800045013428
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,40,8,128,1,float16,float16,0,0.07430080175399781
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,40,8,128,1,float16,fp8,0,0.07203199863433837
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,40,8,128,1,fp8,fp8,0,0.07191039919853211
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,40,40,128,1,float16,float16,0,0.051704001426696775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,40,1,128,1,float16,float16,0,0.039103999733924866
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,40,40,128,1,float16,fp8,0,0.0494592010974884
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,40,40,128,1,fp8,fp8,0,0.049958398938179015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,40,1,128,1,float16,fp8,0,0.041145598888397215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,40,1,128,1,fp8,fp8,0,0.04108799993991852
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,40,2,128,1,float16,float16,0,0.04046559929847717
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,40,2,128,1,float16,fp8,0,0.041119998693466185
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,40,2,128,1,fp8,fp8,0,0.04117920100688934
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,40,4,128,1,float16,float16,0,0.04106239974498749
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,40,4,128,1,float16,fp8,0,0.04114400148391724
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,40,4,128,1,fp8,fp8,0,0.04113920032978058
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,40,8,128,1,float16,float16,0,0.04121600091457367
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,40,40,128,1,float16,float16,0,0.028763198852539064
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,40,8,128,1,fp8,fp8,0,0.04113920032978058
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,40,8,128,1,float16,fp8,0,0.04114879965782166
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,40,40,128,1,float16,fp8,0,0.030847999453544616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,40,1,128,1,float16,float16,0,0.02479359954595566
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,40,40,128,1,fp8,fp8,0,0.030807998776435853
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,40,1,128,1,float16,fp8,0,0.02553919851779938
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,40,1,128,1,fp8,fp8,0,0.024777600169181825
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,40,2,128,1,float16,float16,0,0.024905599653720856
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,40,2,128,1,float16,fp8,0,0.026769599318504332
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,40,2,128,1,fp8,fp8,0,0.02489439994096756
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,40,4,128,1,float16,float16,0,0.024875199794769286
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,40,4,128,1,fp8,fp8,0,0.024771200120449068
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,40,4,128,1,float16,fp8,0,0.02666560113430023
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,40,8,128,1,float16,float16,0,0.02566399872303009
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,40,8,128,1,float16,fp8,0,0.02478239983320236
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,40,8,128,1,fp8,fp8,0,0.026704001426696777
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,40,40,128,1,float16,float16,0,0.018718400597572328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,40,40,128,1,float16,fp8,0,0.020665599405765532
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,40,40,128,1,fp8,fp8,0,0.020755200088024138
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,40,1,128,1,float16,float16,0,0.0185248002409935
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,40,1,128,1,float16,fp8,0,0.018644799292087556
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,40,1,128,1,fp8,fp8,0,0.01860000044107437
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,40,2,128,1,float16,float16,0,0.01854880005121231
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,40,2,128,1,fp8,fp8,0,0.018585599958896637
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,40,2,128,1,float16,fp8,0,0.018611200153827667
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,40,4,128,1,float16,fp8,0,0.018676799535751343
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,40,4,128,1,float16,float16,0,0.01865759938955307
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,40,4,128,1,fp8,fp8,0,0.018515199422836304
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,40,8,128,1,float16,float16,0,0.018612800538539885
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,40,8,128,1,float16,fp8,0,0.018568000197410582
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,40,8,128,1,fp8,fp8,0,0.018636800348758698
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,40,40,128,1,float16,float16,0,0.01600320041179657
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,40,40,128,1,float16,fp8,0,0.014601600170135499
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,40,40,128,1,fp8,fp8,0,0.014664000272750855
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,40,1,128,1,float16,float16,0,0.014483200013637542
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,40,1,128,1,float16,fp8,0,0.014595200121402741
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,40,1,128,1,fp8,fp8,0,0.014500799775123595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,40,2,128,1,float16,float16,0,0.014497600495815277
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,40,2,128,1,float16,fp8,0,0.014555199444293976
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,40,2,128,1,fp8,fp8,0,0.014507199823856353
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,40,4,128,1,float16,float16,0,0.014526399970054626
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,40,4,128,1,float16,fp8,0,0.014553600549697876
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,40,4,128,1,fp8,fp8,0,0.014502400159835815
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,40,8,128,1,float16,fp8,0,0.014579200744628906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,40,8,128,1,float16,float16,0,0.01456640064716339
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,40,8,128,1,fp8,fp8,0,0.014534400403499603
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,40,40,128,1,float16,float16,0,0.012454400211572647
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,40,40,128,1,float16,fp8,0,0.01250080019235611
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,40,40,128,1,fp8,fp8,0,0.012403199821710587
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,40,1,128,1,float16,float16,0,0.010468800365924836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,40,1,128,1,float16,fp8,0,0.010577599704265594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,40,1,128,1,fp8,fp8,0,0.010507199913263321
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,40,2,128,1,float16,float16,0,0.010407999902963639
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,40,2,128,1,float16,fp8,0,0.010979200154542923
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,40,2,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,40,4,128,1,float16,float16,0,0.010425599664449692
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,40,4,128,1,float16,fp8,0,0.010708799958229065
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,40,4,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,40,8,128,1,float16,float16,0,0.01040479987859726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,40,8,128,1,float16,fp8,0,0.012462399899959564
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,40,8,128,1,fp8,fp8,0,0.0105103999376297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,40,40,128,1,float16,fp8,0,0.012401600182056428
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,40,40,128,1,float16,float16,0,0.01247360035777092
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,40,40,128,1,fp8,fp8,0,0.010500799864530563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,40,1,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,40,1,128,1,fp8,fp8,0,0.010462400317192078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,40,1,128,1,float16,fp8,0,0.010428799688816071
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,40,2,128,1,float16,float16,0,0.01053439974784851
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,40,2,128,1,float16,fp8,0,0.010463999956846238
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,40,2,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,40,4,128,1,float16,float16,0,0.01053759977221489
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,40,4,128,1,float16,fp8,0,0.010419200360774993
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,40,4,128,1,fp8,fp8,0,0.010503999888896942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,40,8,128,1,float16,float16,0,0.010622400045394897
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,40,8,128,1,float16,fp8,0,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,40,8,128,1,fp8,fp8,0,0.01050880029797554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,40,1,128,1,float16,float16,0,0.2157360076904297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,40,1,128,1,float16,fp8,0,0.2211359977722168
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,40,1,128,1,fp8,fp8,0,0.22128961086273194
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,40,2,128,1,float16,fp8,0,0.22043039798736572
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,40,2,128,1,fp8,fp8,0,0.2199712038040161
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,40,2,128,1,float16,float16,0,0.21579039096832275
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,40,4,128,1,float16,float16,0,0.21992959976196289
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,40,4,128,1,float16,fp8,0,0.2208559989929199
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,40,4,128,1,fp8,fp8,0,0.21939520835876464
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,40,8,128,1,float16,fp8,0,0.21974239349365235
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,40,8,128,1,float16,float16,0,0.2229520082473755
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,40,8,128,1,fp8,fp8,0,0.219321608543396
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,40,40,128,1,float16,float16,0,0.1342192053794861
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,40,40,128,1,float16,fp8,0,0.13193919658660888
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,40,40,128,1,fp8,fp8,0,0.1314736008644104
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,40,1,128,1,float16,float16,0,0.11262719631195069
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,40,1,128,1,float16,fp8,0,0.11457439661026
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,40,1,128,1,fp8,fp8,0,0.11293760538101197
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,40,2,128,1,float16,float16,0,0.11329599618911743
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,40,2,128,1,float16,fp8,0,0.11466879844665527
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,40,2,128,1,fp8,fp8,0,0.11296160221099853
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,40,4,128,1,float16,float16,0,0.11483520269393921
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,40,4,128,1,float16,fp8,0,0.11437599658966065
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,40,4,128,1,fp8,fp8,0,0.11294080018997192
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,40,8,128,1,float16,float16,0,0.11733920574188232
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,40,8,128,1,float16,fp8,0,0.11490720510482788
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,40,8,128,1,fp8,fp8,0,0.11306560039520264
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,40,40,128,1,float16,float16,0,0.07217119932174683
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,40,40,128,1,float16,fp8,0,0.06992480158805847
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,40,40,128,1,fp8,fp8,0,0.06981920003890991
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,40,1,128,1,float16,float16,0,0.06085280179977417
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,40,1,128,1,float16,fp8,0,0.0614736020565033
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,40,1,128,1,fp8,fp8,0,0.06040480136871338
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,40,2,128,1,float16,fp8,0,0.06068959832191467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,40,2,128,1,float16,float16,0,0.060288000106811526
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,40,2,128,1,fp8,fp8,0,0.060094398260116574
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,40,4,128,1,float16,fp8,0,0.06124640107154846
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,40,4,128,1,float16,float16,0,0.061831998825073245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,40,4,128,1,fp8,fp8,0,0.05973119735717773
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,40,8,128,1,float16,float16,0,0.06223520040512085
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,40,8,128,1,float16,fp8,0,0.060203200578689574
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,40,8,128,1,fp8,fp8,0,0.06065760254859924
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,40,40,128,1,float16,float16,0,0.03744319975376129
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,40,40,128,1,float16,fp8,0,0.039105600118637084
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,40,40,128,1,fp8,fp8,0,0.03926720023155213
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,40,1,128,1,float16,float16,0,0.035020801424980166
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,40,1,128,1,float16,fp8,0,0.035041600465774536
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,40,1,128,1,fp8,fp8,0,0.03509120047092438
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,40,2,128,1,float16,float16,0,0.035017600655555724
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,40,2,128,1,float16,fp8,0,0.03508000075817108
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,40,4,128,1,float16,float16,0,0.03496319949626923
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,40,2,128,1,fp8,fp8,0,0.03515680134296417
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,40,4,128,1,float16,fp8,0,0.035025599598884585
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,40,4,128,1,fp8,fp8,0,0.03519040048122406
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,40,8,128,1,float16,fp8,0,0.03500480055809021
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,40,8,128,1,float16,float16,0,0.0352975994348526
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,40,8,128,1,fp8,fp8,0,0.03513120114803314
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,40,40,128,1,float16,float16,0,0.02466080039739609
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,40,40,128,1,float16,fp8,0,0.024854399263858795
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,40,40,128,1,fp8,fp8,0,0.024758400022983552
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,40,1,128,1,float16,float16,0,0.02273920029401779
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,40,1,128,1,float16,fp8,0,0.022806400060653688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,40,1,128,1,fp8,fp8,0,0.02268799990415573
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,40,2,128,1,float16,float16,0,0.022921599447727203
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,40,2,128,1,float16,fp8,0,0.022654399275779724
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,40,4,128,1,float16,float16,0,0.022699199616909027
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,40,4,128,1,float16,fp8,0,0.022672000527381896
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,40,2,128,1,fp8,fp8,0,0.023076799511909486
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,40,4,128,1,fp8,fp8,0,0.022892799973487855
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,40,8,128,1,float16,float16,0,0.022763200104236603
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,40,8,128,1,float16,fp8,0,0.022729599475860597
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,40,8,128,1,fp8,fp8,0,0.02285120040178299
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,40,40,128,1,float16,float16,0,0.01656000018119812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,40,40,128,1,float16,fp8,0,0.01828639954328537
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,40,40,128,1,fp8,fp8,0,0.01847040057182312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,40,1,128,1,float16,float16,0,0.016502399742603303
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,40,1,128,1,float16,fp8,0,0.01669919937849045
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,40,1,128,1,fp8,fp8,0,0.016655999422073364
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,40,2,128,1,float16,float16,0,0.016551999747753142
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,40,2,128,1,float16,fp8,0,0.016659200191497803
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,40,2,128,1,fp8,fp8,0,0.016599999368190767
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,40,4,128,1,float16,float16,0,0.016575999557971954
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,40,4,128,1,float16,fp8,0,0.01677280068397522
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,40,8,128,1,float16,float16,0,0.01663679927587509
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,40,4,128,1,fp8,fp8,0,0.0164560005068779
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,40,40,128,1,float16,float16,0,0.014484800398349762
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,40,8,128,1,fp8,fp8,0,0.016515199840068818
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,40,8,128,1,float16,fp8,0,0.016667200624942778
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,40,40,128,1,float16,fp8,0,0.01443679928779602
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,40,1,128,1,float16,float16,0,0.012577599287033081
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,40,40,128,1,fp8,fp8,0,0.01446560025215149
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,40,1,128,1,float16,fp8,0,0.014033600687980652
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,40,1,128,1,fp8,fp8,0,0.012910400331020356
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,40,2,128,1,float16,float16,0,0.012591999769210816
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,40,2,128,1,float16,fp8,0,0.014496000111103058
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,40,2,128,1,fp8,fp8,0,0.012729600071907043
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,40,4,128,1,float16,float16,0,0.012651200592517852
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,40,4,128,1,float16,fp8,0,0.014416000247001648
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,40,4,128,1,fp8,fp8,0,0.01281919926404953
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,40,8,128,1,float16,float16,0,0.01265919953584671
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,40,8,128,1,float16,fp8,0,0.014499199390411378
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,40,40,128,1,float16,float16,0,0.012443199753761292
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,40,8,128,1,fp8,fp8,0,0.014425599575042724
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,40,40,128,1,float16,fp8,0,0.010526400059461594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,40,40,128,1,fp8,fp8,0,0.010480000078678131
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,40,1,128,1,float16,float16,0,0.010545600205659866
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,40,2,128,1,float16,float16,0,0.010478399693965912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,40,1,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,40,1,128,1,float16,fp8,0,0.010471999645233154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,40,2,128,1,float16,fp8,0,0.010529600083827972
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,40,2,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,40,4,128,1,float16,float16,0,0.010552000254392624
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,40,4,128,1,float16,fp8,0,0.010451199859380722
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,40,4,128,1,fp8,fp8,0,0.010516799986362457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,40,8,128,1,float16,float16,0,0.010500799864530563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,40,8,128,1,fp8,fp8,0,0.010406400263309478
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,40,8,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,40,40,128,1,float16,float16,0,0.010603199899196624
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,40,40,128,1,float16,fp8,0,0.010438399761915207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,40,40,128,1,fp8,fp8,0,0.01050880029797554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,40,1,128,1,float16,float16,0,0.010502400249242783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,40,1,128,1,float16,fp8,0,0.010451199859380722
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,40,2,128,1,float16,float16,0,0.010468800365924836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,40,1,128,1,fp8,fp8,0,0.01058719977736473
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,40,2,128,1,float16,fp8,0,0.010443200170993806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,40,4,128,1,float16,float16,0,0.010380800068378448
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,40,2,128,1,fp8,fp8,0,0.010524799674749374
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,40,4,128,1,float16,fp8,0,0.01043360009789467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,40,4,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,40,8,128,1,float16,float16,0,0.010627199709415436
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,40,8,128,1,float16,fp8,0,0.010428799688816071
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,40,8,128,1,fp8,fp8,0,0.010486400127410889
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,40,1,128,1,float16,float16,0,0.2061840057373047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,40,1,128,1,float16,fp8,0,0.19892480373382568
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,40,1,128,1,fp8,fp8,0,0.19886879920959472
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,40,2,128,1,float16,float16,0,0.20715839862823487
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,40,2,128,1,float16,fp8,0,0.19893440008163452
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,40,2,128,1,fp8,fp8,0,0.19900480508804322
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,40,4,128,1,float16,float16,0,0.20641279220581055
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,40,4,128,1,float16,fp8,0,0.19886879920959472
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,40,4,128,1,fp8,fp8,0,0.19896479845046997
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,40,8,128,1,float16,fp8,0,0.19901599884033203
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,40,8,128,1,float16,float16,0,0.20982880592346193
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,40,8,128,1,fp8,fp8,0,0.19914720058441163
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,40,40,128,1,float16,float16,0,0.12111999988555908
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,40,40,128,1,float16,fp8,0,0.11126559972763062
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,40,40,128,1,fp8,fp8,0,0.11284960508346557
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,40,1,128,1,float16,float16,0,0.10871520042419433
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,40,1,128,1,float16,fp8,0,0.10264320373535156
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,40,1,128,1,fp8,fp8,0,0.10268160104751586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,40,2,128,1,float16,float16,0,0.1082927942276001
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,40,2,128,1,float16,fp8,0,0.10275360345840454
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,40,4,128,1,float16,float16,0,0.10905920267105103
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,40,2,128,1,fp8,fp8,0,0.10301120281219482
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,40,4,128,1,float16,fp8,0,0.10285600423812866
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,40,4,128,1,fp8,fp8,0,0.10267200469970703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,40,8,128,1,float16,float16,0,0.11044960021972657
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,40,8,128,1,float16,fp8,0,0.1026960015296936
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,40,40,128,1,float16,float16,0,0.06414080262184144
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,40,8,128,1,fp8,fp8,0,0.10276000499725342
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,40,40,128,1,float16,fp8,0,0.05987200140953064
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,40,40,128,1,fp8,fp8,0,0.061054402589797975
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,40,1,128,1,float16,float16,0,0.05963519811630249
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,40,1,128,1,float16,fp8,0,0.055524802207946776
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,40,1,128,1,fp8,fp8,0,0.055580800771713255
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,40,2,128,1,float16,float16,0,0.05975040197372437
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,40,2,128,1,float16,fp8,0,0.05570399761199951
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,40,2,128,1,fp8,fp8,0,0.05568640232086182
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,40,4,128,1,float16,fp8,0,0.05565119981765747
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,40,4,128,1,float16,float16,0,0.05992799997329712
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,40,4,128,1,fp8,fp8,0,0.055511999130249026
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,40,8,128,1,float16,float16,0,0.060038399696350095
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,40,8,128,1,float16,fp8,0,0.05564320087432861
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,40,8,128,1,fp8,fp8,0,0.0556335985660553
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,40,40,128,1,float16,float16,0,0.036950400471687316
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,40,40,128,1,float16,fp8,0,0.03530080020427704
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,40,1,128,1,float16,float16,0,0.0350959986448288
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,40,40,128,1,fp8,fp8,0,0.03516640067100525
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,40,1,128,1,fp8,fp8,0,0.032948800921440126
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,40,2,128,1,float16,float16,0,0.03515680134296417
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,40,1,128,1,float16,fp8,0,0.03320960104465485
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,40,2,128,1,float16,fp8,0,0.03296799957752228
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,40,4,128,1,float16,float16,0,0.03519200086593628
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,40,2,128,1,fp8,fp8,0,0.0330704003572464
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,40,4,128,1,float16,fp8,0,0.03288480043411255
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,40,4,128,1,fp8,fp8,0,0.033099201321601865
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,40,8,128,1,float16,float16,0,0.03516640067100525
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,40,8,128,1,float16,fp8,0,0.032892799377441405
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,40,40,128,1,float16,float16,0,0.02345760017633438
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,40,8,128,1,fp8,fp8,0,0.033241599798202515
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,40,40,128,1,float16,fp8,0,0.02269279956817627
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,40,40,128,1,fp8,fp8,0,0.0228752002120018
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,40,1,128,1,float16,float16,0,0.02285760045051575
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,40,1,128,1,float16,fp8,0,0.020787200331687926
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,40,1,128,1,fp8,fp8,0,0.020755200088024138
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,40,2,128,1,float16,fp8,0,0.021559999883174898
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,40,2,128,1,float16,float16,0,0.022840000689029694
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,40,2,128,1,fp8,fp8,0,0.02080480009317398
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,40,4,128,1,float16,float16,0,0.02269279956817627
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,40,4,128,1,fp8,fp8,0,0.020732800662517547
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,40,4,128,1,float16,fp8,0,0.02083680033683777
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,40,8,128,1,float16,float16,0,0.022673599421977997
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,40,8,128,1,float16,fp8,0,0.02081120014190674
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,40,8,128,1,fp8,fp8,0,0.020891200006008147
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,40,40,128,1,float16,float16,0,0.016646400094032288
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,40,40,128,1,float16,fp8,0,0.01648640036582947
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,40,1,128,1,float16,float16,0,0.016502399742603303
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,40,1,128,1,float16,fp8,0,0.01656160056591034
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,40,40,128,1,fp8,fp8,0,0.016700799763202667
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,40,1,128,1,fp8,fp8,0,0.01653439998626709
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,40,2,128,1,float16,float16,0,0.016627199947834015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,40,2,128,1,float16,fp8,0,0.016646400094032288
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,40,2,128,1,fp8,fp8,0,0.016465599834918975
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,40,4,128,1,float16,float16,0,0.01648160070180893
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,40,4,128,1,float16,fp8,0,0.015732799470424653
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,40,4,128,1,fp8,fp8,0,0.016523200273513793
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,40,8,128,1,float16,float16,0,0.01656000018119812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,40,8,128,1,fp8,fp8,0,0.016465599834918975
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,40,8,128,1,float16,fp8,0,0.016022400557994844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,40,40,128,1,float16,float16,0,0.01451839953660965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,40,40,128,1,float16,fp8,0,0.012579199671745301
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,40,1,128,1,float16,float16,0,0.013516800105571746
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,40,40,128,1,fp8,fp8,0,0.012495999783277511
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,40,1,128,1,float16,fp8,0,0.012531200051307678
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,40,1,128,1,fp8,fp8,0,0.01263359934091568
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,40,2,128,1,float16,float16,0,0.0126351997256279
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,40,2,128,1,float16,fp8,0,0.012544000148773193
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,40,2,128,1,fp8,fp8,0,0.012639999389648438
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,40,4,128,1,float16,float16,0,0.012593600153923034
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,40,4,128,1,float16,fp8,0,0.012495999783277511
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,40,8,128,1,float16,float16,0,0.012591999769210816
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,40,4,128,1,fp8,fp8,0,0.012603199481964112
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,40,8,128,1,float16,fp8,0,0.012591999769210816
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,40,8,128,1,fp8,fp8,0,0.01260959953069687
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,40,40,128,1,float16,float16,0,0.012412799894809723
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,40,40,128,1,float16,fp8,0,0.01048320010304451
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,40,1,128,1,float16,float16,0,0.010475199669599533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,40,40,128,1,fp8,fp8,0,0.010503999888896942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,40,1,128,1,float16,fp8,0,0.01048479974269867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,40,1,128,1,fp8,fp8,0,0.010451199859380722
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,40,2,128,1,float16,float16,0,0.010473600029945374
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,40,2,128,1,float16,fp8,0,0.010502400249242783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,40,2,128,1,fp8,fp8,0,0.010398399829864503
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,40,4,128,1,float16,float16,0,0.010599999874830245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,40,4,128,1,float16,fp8,0,0.010497599840164185
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,40,4,128,1,fp8,fp8,0,0.010340800136327743
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,40,8,128,1,float16,float16,0,0.010518400371074677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,40,8,128,1,float16,fp8,0,0.010524799674749374
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,40,8,128,1,fp8,fp8,0,0.010468800365924836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,40,40,128,1,float16,float16,0,0.010527999699115753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,40,40,128,1,float16,fp8,0,0.01048159971833229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,40,40,128,1,fp8,fp8,0,0.01037919968366623
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,40,1,128,1,float16,float16,0,0.010499200224876404
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,40,1,128,1,float16,fp8,0,0.010444799810647965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,40,1,128,1,fp8,fp8,0,0.010371199995279311
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,40,2,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,40,2,128,1,float16,fp8,0,0.01050880029797554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,40,2,128,1,fp8,fp8,0,0.010387200117111205
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,40,4,128,1,float16,float16,0,0.010422399640083313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,40,4,128,1,float16,fp8,0,0.01043360009789467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,40,4,128,1,fp8,fp8,0,0.010471999645233154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,40,8,128,1,float16,fp8,0,0.0105103999376297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,40,8,128,1,float16,float16,0,0.010659199953079224
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,40,8,128,1,fp8,fp8,0,0.010513599961996078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,40,1,128,1,float16,float16,0,0.20328159332275392
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,40,1,128,1,float16,fp8,0,0.19048320055007933
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,40,1,128,1,fp8,fp8,0,0.19087040424346924
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,40,2,128,1,float16,float16,0,0.2037168025970459
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,40,2,128,1,float16,fp8,0,0.1905408024787903
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,40,2,128,1,fp8,fp8,0,0.18952959775924683
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,40,4,128,1,float16,float16,0,0.20330240726470947
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,40,4,128,1,float16,fp8,0,0.189411199092865
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,40,4,128,1,fp8,fp8,0,0.19001599550247192
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,40,8,128,1,float16,float16,0,0.20339360237121581
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,40,8,128,1,float16,fp8,0,0.19088480472564698
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,40,8,128,1,fp8,fp8,0,0.18992480039596557
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,40,40,128,1,float16,float16,0,0.10773760080337524
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,40,40,128,1,float16,fp8,0,0.10098079442977906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,40,40,128,1,fp8,fp8,0,0.10056799650192261
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,40,1,128,1,float16,float16,0,0.10698239803314209
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,40,1,128,1,fp8,fp8,0,0.10060160160064698
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,40,1,128,1,float16,fp8,0,0.1007200002670288
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,40,2,128,1,float16,float16,0,0.10695199966430664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,40,2,128,1,float16,fp8,0,0.09975039958953857
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,40,4,128,1,float16,float16,0,0.10699199438095093
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,40,2,128,1,fp8,fp8,0,0.10082240104675293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,40,4,128,1,float16,fp8,0,0.09933120012283325
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,40,4,128,1,fp8,fp8,0,0.10055680274963379
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,40,8,128,1,float16,float16,0,0.10728800296783447
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,40,8,128,1,float16,fp8,0,0.10056799650192261
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,40,8,128,1,fp8,fp8,0,0.10070079565048218
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,40,40,128,1,float16,float16,0,0.05999199748039245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,40,40,128,1,float16,fp8,0,0.05441759824752808
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,40,40,128,1,fp8,fp8,0,0.05564000010490418
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,40,1,128,1,float16,float16,0,0.05968800187110901
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,40,1,128,1,float16,fp8,0,0.05351999998092651
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,40,1,128,1,fp8,fp8,0,0.05548480153083801
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,40,2,128,1,float16,float16,0,0.059646397829055786
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,40,2,128,1,float16,fp8,0,0.053547197580337526
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,40,2,128,1,fp8,fp8,0,0.05544480085372925
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,40,4,128,1,float16,float16,0,0.05930719971656799
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,40,4,128,1,float16,fp8,0,0.05350720286369324
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,40,4,128,1,fp8,fp8,0,0.05373600125312805
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,40,8,128,1,float16,float16,0,0.05974239706993103
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,40,8,128,1,fp8,fp8,0,0.054371201992034913
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,40,8,128,1,float16,fp8,0,0.05356159806251526
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,40,40,128,1,float16,float16,0,0.03516319990158081
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,40,40,128,1,float16,fp8,0,0.03308799862861633
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,40,40,128,1,fp8,fp8,0,0.03297599852085113
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,40,1,128,1,fp8,fp8,0,0.0329120010137558
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,40,1,128,1,float16,fp8,0,0.03293280005455017
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,40,1,128,1,float16,float16,0,0.034929600358009336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,40,2,128,1,float16,float16,0,0.03495360016822815
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,40,2,128,1,float16,fp8,0,0.03293440043926239
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,40,2,128,1,fp8,fp8,0,0.032924801111221313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,40,4,128,1,float16,float16,0,0.035025599598884585
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,40,4,128,1,float16,fp8,0,0.03301759958267212
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,40,4,128,1,fp8,fp8,0,0.031017601490020752
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,40,8,128,1,float16,float16,0,0.0350271999835968
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,40,8,128,1,float16,fp8,0,0.03284800052642822
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,40,8,128,1,fp8,fp8,0,0.03097760081291199
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,40,40,128,1,float16,float16,0,0.022726400196552275
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,40,40,128,1,float16,fp8,0,0.020787200331687926
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,40,40,128,1,fp8,fp8,0,0.02072799950838089
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,40,1,128,1,float16,float16,0,0.022729599475860597
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,40,1,128,1,float16,fp8,0,0.020761600136756896
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,40,1,128,1,fp8,fp8,0,0.020683200657367708
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,40,2,128,1,float16,float16,0,0.0227743998169899
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,40,2,128,1,float16,fp8,0,0.02072640061378479
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,40,2,128,1,fp8,fp8,0,0.02062080055475235
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,40,4,128,1,float16,float16,0,0.022249600291252135
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,40,4,128,1,float16,fp8,0,0.020764799416065217
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,40,4,128,1,fp8,fp8,0,0.02067520022392273
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,40,8,128,1,float16,float16,0,0.022652800381183624
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,40,8,128,1,float16,fp8,0,0.020793600380420683
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,40,8,128,1,fp8,fp8,0,0.020707200467586517
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,40,40,128,1,float16,float16,0,0.01669600009918213
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,40,40,128,1,float16,fp8,0,0.016415999829769136
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,40,40,128,1,fp8,fp8,0,0.01462559998035431
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,40,1,128,1,float16,float16,0,0.016147199273109435
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,40,1,128,1,float16,fp8,0,0.016415999829769136
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,40,1,128,1,fp8,fp8,0,0.015012800693511963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,40,2,128,1,float16,float16,0,0.016568000614643096
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,40,2,128,1,float16,fp8,0,0.016451199352741242
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,40,2,128,1,fp8,fp8,0,0.015387199819087982
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,40,4,128,1,float16,float16,0,0.0164560005068779
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,40,4,128,1,float16,fp8,0,0.016521599888801575
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,40,4,128,1,fp8,fp8,0,0.015515199303627015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,40,8,128,1,float16,float16,0,0.016545599699020384
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,40,8,128,1,float16,fp8,0,0.015452800691127777
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,40,8,128,1,fp8,fp8,0,0.015641599893569946
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,40,40,128,1,float16,float16,0,0.014528000354766845
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,40,40,128,1,float16,fp8,0,0.012544000148773193
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,40,40,128,1,fp8,fp8,0,0.012534399330615998
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,40,1,128,1,float16,fp8,0,0.012436799705028534
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,40,1,128,1,float16,float16,0,0.012495999783277511
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,40,1,128,1,fp8,fp8,0,0.012436799705028534
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,40,2,128,1,float16,fp8,0,0.012484800070524216
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,40,2,128,1,float16,float16,0,0.012582400441169738
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,40,4,128,1,float16,float16,0,0.012462399899959564
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,40,2,128,1,fp8,fp8,0,0.012438400089740754
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,40,4,128,1,float16,fp8,0,0.012537600100040435
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,40,4,128,1,fp8,fp8,0,0.012617599964141846
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,40,8,128,1,float16,float16,0,0.012443199753761292
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,40,8,128,1,float16,fp8,0,0.012625600397586822
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,40,8,128,1,fp8,fp8,0,0.012630400061607362
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,40,40,128,1,float16,float16,0,0.012542399764060973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,40,40,128,1,float16,fp8,0,0.010489600151777268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,40,40,128,1,fp8,fp8,0,0.010580799728631973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,40,1,128,1,float16,float16,0,0.010416000336408614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,40,1,128,1,float16,fp8,0,0.010480000078678131
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,40,1,128,1,fp8,fp8,0,0.010361599922180175
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,40,2,128,1,float16,float16,0,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,40,2,128,1,float16,fp8,0,0.010486400127410889
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,40,2,128,1,fp8,fp8,0,0.010292799770832061
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,40,4,128,1,float16,float16,0,0.010518400371074677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,40,4,128,1,float16,fp8,0,0.010518400371074677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,40,4,128,1,fp8,fp8,0,0.010337600111961364
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,40,8,128,1,float16,float16,0,0.010387200117111205
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,40,8,128,1,fp8,fp8,0,0.010351999849081039
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,40,8,128,1,float16,fp8,0,0.010446400195360184
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,40,40,128,1,float16,float16,0,0.01045600026845932
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,40,40,128,1,fp8,fp8,0,0.010465600341558457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,40,40,128,1,float16,fp8,0,0.010291200131177902
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,40,1,128,1,float16,float16,0,0.010476800054311753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,40,1,128,1,float16,fp8,0,0.010407999902963639
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,40,2,128,1,float16,float16,0,0.010393600165843963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,40,1,128,1,fp8,fp8,0,0.010451199859380722
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,40,2,128,1,float16,fp8,0,0.010398399829864503
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,40,2,128,1,fp8,fp8,0,0.01043040007352829
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,40,4,128,1,float16,float16,0,0.010278400033712387
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,40,4,128,1,float16,fp8,0,0.010414399951696397
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,40,4,128,1,fp8,fp8,0,0.010480000078678131
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,40,8,128,1,float16,float16,0,0.010463999956846238
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,40,8,128,1,fp8,fp8,0,0.0104032002389431
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,40,8,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,32,1,128,1,float16,fp8,0,10.876121520996094
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,32,1,128,1,fp8,fp8,0,11.622227478027344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,32,1,128,1,float16,float16,0,17.940104675292968
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,32,2,128,1,float16,float16,0,17.549639892578124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,32,2,128,1,float16,fp8,0,11.03606719970703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,32,2,128,1,fp8,fp8,0,11.735782623291016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,32,4,128,1,float16,fp8,0,11.607077026367188
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,32,4,128,1,fp8,fp8,0,12.242118072509765
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,32,4,128,1,float16,float16,0,20.033296203613283
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,32,8,128,1,float16,float16,0,21.477931213378906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,32,8,128,1,float16,fp8,0,11.626445007324218
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,32,8,128,1,fp8,fp8,0,11.701142120361329
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,32,32,128,1,float16,fp8,0,5.823132705688477
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,32,32,128,1,fp8,fp8,0,5.676820755004883
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,32,1,128,1,float16,float16,0,9.578684997558593
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,32,1,128,1,float16,fp8,0,5.498617553710938
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,32,1,128,1,fp8,fp8,0,5.398926544189453
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,32,2,128,1,float16,fp8,0,5.668075180053711
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,32,2,128,1,float16,float16,0,9.757762908935547
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,32,2,128,1,fp8,fp8,0,5.432676696777344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,32,4,128,1,fp8,fp8,0,5.489299011230469
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,32,4,128,1,float16,fp8,0,5.640726470947266
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,32,4,128,1,float16,float16,0,9.434255981445313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,32,8,128,1,float16,fp8,0,5.662572860717773
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,32,8,128,1,fp8,fp8,0,6.121473693847657
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,32,8,128,1,float16,float16,0,9.860372924804688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,32,32,128,1,float16,fp8,0,3.1028768539428713
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,32,32,128,1,fp8,fp8,0,3.130665588378906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,32,1,128,1,float16,fp8,0,2.891592025756836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,32,1,128,1,float16,float16,0,4.365561676025391
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,32,1,128,1,fp8,fp8,0,2.8005279541015624
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,32,2,128,1,float16,float16,0,4.721652984619141
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,32,2,128,1,float16,fp8,0,2.8346960067749025
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,32,2,128,1,fp8,fp8,0,3.194243240356445
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,32,4,128,1,float16,float16,0,4.021054458618164
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,32,4,128,1,float16,fp8,0,2.713240051269531
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,32,4,128,1,fp8,fp8,0,2.792521667480469
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,32,8,128,1,float16,fp8,0,2.779748725891113
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,32,8,128,1,float16,float16,0,4.270134353637696
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,32,8,128,1,fp8,fp8,0,2.8678495407104494
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,32,32,128,1,float16,fp8,0,1.6382415771484375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,32,32,128,1,fp8,fp8,0,1.477620792388916
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,32,1,128,1,float16,float16,0,1.5777215957641602
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,32,1,128,1,float16,fp8,0,1.9533855438232421
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,32,1,128,1,fp8,fp8,0,1.487718391418457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,32,2,128,1,float16,float16,0,1.6481632232666015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,32,2,128,1,float16,fp8,0,1.7945135116577149
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,32,2,128,1,fp8,fp8,0,1.4030176162719727
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,32,4,128,1,float16,float16,0,1.7011472702026367
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,32,4,128,1,float16,fp8,0,1.671494483947754
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,32,4,128,1,fp8,fp8,0,1.4231727600097657
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,32,8,128,1,float16,float16,0,1.7283391952514648
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,32,8,128,1,float16,fp8,0,2.0544912338256838
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,32,8,128,1,fp8,fp8,0,1.4021568298339844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,32,1,128,1,float16,float16,0,10.52420654296875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,32,1,128,1,float16,fp8,0,6.361252975463867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,32,1,128,1,fp8,fp8,0,6.599015808105468
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,32,2,128,1,float16,float16,0,10.703897857666016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,32,2,128,1,float16,fp8,0,6.83575668334961
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,32,2,128,1,fp8,fp8,0,6.600673675537109
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,32,4,128,1,float16,float16,0,10.873194885253906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,32,4,128,1,float16,fp8,0,6.803975677490234
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,32,4,128,1,fp8,fp8,0,6.6012626647949215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,32,8,128,1,float16,float16,0,11.239155578613282
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,32,32,128,1,float16,float16,0,1.654360008239746
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,32,32,128,1,float16,float16,0,8.846336364746094
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,32,32,128,1,float16,float16,0,3.978638458251953
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,32,8,128,1,float16,fp8,0,6.719374084472657
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,32,32,128,1,float16,float16,0,4.809729766845703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,32,8,128,1,fp8,fp8,0,6.287659072875977
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,32,32,128,1,float16,fp8,0,3.1567935943603516
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,32,32,128,1,fp8,fp8,0,3.3849681854248046
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,32,1,128,1,float16,fp8,0,3.1246288299560545
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,32,1,128,1,fp8,fp8,0,3.157473564147949
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,32,1,128,1,float16,float16,0,5.064616012573242
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,32,2,128,1,float16,float16,0,4.987051010131836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,32,2,128,1,float16,fp8,0,3.1109264373779295
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,32,2,128,1,fp8,fp8,0,3.046681594848633
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,32,4,128,1,float16,fp8,0,3.2151134490966795
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,32,4,128,1,float16,float16,0,4.945481491088867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,32,4,128,1,fp8,fp8,0,3.4990913391113283
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,32,8,128,1,float16,float16,0,4.117422485351563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,32,32,128,1,float16,float16,0,2.088547134399414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,32,8,128,1,float16,fp8,0,3.361675262451172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,32,8,128,1,fp8,fp8,0,3.137745666503906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,32,32,128,1,float16,fp8,0,1.8222320556640625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,32,32,128,1,fp8,fp8,0,1.8420160293579102
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,32,1,128,1,float16,fp8,0,1.6104927062988281
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,32,1,128,1,fp8,fp8,0,1.561251163482666
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,32,1,128,1,float16,float16,0,2.423788833618164
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,32,2,128,1,float16,float16,0,1.9202495574951173
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,32,2,128,1,float16,fp8,0,1.5859439849853516
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,32,2,128,1,fp8,fp8,0,1.7055023193359375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,32,4,128,1,float16,float16,0,2.976862335205078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,32,4,128,1,fp8,fp8,0,1.6300016403198243
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,32,4,128,1,float16,fp8,0,2.0656320571899416
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,32,8,128,1,float16,float16,0,2.052516746520996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,32,32,128,1,float16,float16,0,1.009160041809082
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,32,8,128,1,fp8,fp8,0,1.667932891845703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,32,32,128,1,float16,fp8,0,1.462012767791748
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,32,32,128,1,fp8,fp8,0,1.0146368026733399
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,32,8,128,1,float16,fp8,0,2.3115440368652345
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,32,1,128,1,float16,float16,0,0.9492207527160644
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,32,1,128,1,float16,fp8,0,0.8810527801513672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,32,1,128,1,fp8,fp8,0,0.8900992393493652
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,32,2,128,1,float16,float16,0,0.9999168395996094
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,32,2,128,1,fp8,fp8,0,0.8344304084777832
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,32,2,128,1,float16,fp8,0,1.2386704444885255
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,32,4,128,1,float16,float16,0,0.9606736183166504
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,32,4,128,1,float16,fp8,0,1.1971247673034668
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,32,4,128,1,fp8,fp8,0,0.8354672431945801
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,32,8,128,1,float16,float16,0,0.9464655876159668
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,32,8,128,1,float16,fp8,0,0.9206447601318359
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,32,8,128,1,fp8,fp8,0,0.8944992065429688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,32,1,128,1,float16,fp8,0,4.333667373657226
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,32,1,128,1,fp8,fp8,0,4.47694091796875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,32,1,128,1,float16,float16,0,7.45544662475586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,32,2,128,1,float16,float16,0,7.505169677734375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,32,2,128,1,float16,fp8,0,4.328467178344726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,32,2,128,1,fp8,fp8,0,4.503323364257812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,32,4,128,1,float16,fp8,0,4.847889709472656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,32,4,128,1,fp8,fp8,0,4.687844848632812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,32,4,128,1,float16,float16,0,7.409099578857422
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,32,8,128,1,float16,float16,0,7.6218620300292965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,32,8,128,1,float16,fp8,0,4.685200119018555
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,32,32,128,1,float16,float16,0,3.911804962158203
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,32,8,128,1,fp8,fp8,0,4.612369537353516
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,32,32,128,1,float16,fp8,0,2.6339855194091797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,32,32,128,1,fp8,fp8,0,2.6151519775390626
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,32,1,128,1,float16,fp8,0,2.3450639724731444
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,32,1,128,1,float16,float16,0,3.5913951873779295
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,32,1,128,1,fp8,fp8,0,2.2295759201049803
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,32,2,128,1,float16,fp8,0,2.2528432846069335
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,32,2,128,1,float16,float16,0,3.358697509765625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,32,2,128,1,fp8,fp8,0,2.4048799514770507
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,32,4,128,1,float16,float16,0,3.6125102996826173
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,32,4,128,1,float16,fp8,0,2.4130239486694336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,32,4,128,1,fp8,fp8,0,2.8357791900634766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,32,8,128,1,float16,float16,0,3.3975025177001954
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,32,8,128,1,float16,fp8,0,2.4820032119750977
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,32,8,128,1,fp8,fp8,0,2.2710208892822266
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,32,32,128,1,float16,float16,0,1.4421808242797851
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,32,32,128,1,fp8,fp8,0,1.2623151779174804
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,32,32,128,1,float16,fp8,0,1.4240336418151855
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,32,1,128,1,float16,float16,0,1.4513440132141113
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,32,1,128,1,float16,fp8,0,1.759480094909668
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,32,1,128,1,fp8,fp8,0,1.1399328231811523
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,32,2,128,1,float16,float16,0,1.340345573425293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,32,2,128,1,float16,fp8,0,1.5942928314208984
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,32,2,128,1,fp8,fp8,0,1.1275839805603027
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,32,4,128,1,float16,float16,0,1.3001055717468262
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,32,4,128,1,float16,fp8,0,1.3936688423156738
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,32,4,128,1,fp8,fp8,0,1.1818063735961915
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,32,8,128,1,float16,float16,0,1.285416030883789
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,32,8,128,1,float16,fp8,0,1.2917856216430663
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,32,32,128,1,float16,float16,0,0.7289167881011963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,32,8,128,1,fp8,fp8,0,1.1665040016174317
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,32,32,128,1,float16,fp8,0,0.6591328144073486
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,32,32,128,1,fp8,fp8,0,0.6601168155670166
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,32,1,128,1,float16,float16,0,0.683135986328125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,32,1,128,1,float16,fp8,0,0.6150608062744141
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,32,1,128,1,fp8,fp8,0,0.6849584102630615
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,32,2,128,1,float16,float16,0,0.703926420211792
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,32,2,128,1,float16,fp8,0,0.6288703918457031
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,32,2,128,1,fp8,fp8,0,0.6115071773529053
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,32,4,128,1,float16,float16,0,0.7221648216247558
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,32,4,128,1,float16,fp8,0,0.6403696060180664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,32,4,128,1,fp8,fp8,0,0.610640001296997
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,32,8,128,1,float16,float16,0,0.7921872138977051
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,32,8,128,1,float16,fp8,0,0.6165791988372803
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,32,8,128,1,fp8,fp8,0,0.6094528198242187
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,32,1,128,1,fp8,fp8,0,5.823559951782227
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,32,1,128,1,float16,fp8,0,5.909211349487305
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,32,1,128,1,float16,float16,0,10.411450958251953
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,32,2,128,1,float16,fp8,0,5.859695816040039
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,32,2,128,1,fp8,fp8,0,6.019916915893555
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,32,2,128,1,float16,float16,0,8.792407989501953
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,32,4,128,1,float16,fp8,0,6.040403366088867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,32,4,128,1,fp8,fp8,0,6.298564910888672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,32,4,128,1,float16,float16,0,10.73401107788086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,32,8,128,1,float16,float16,0,9.495108795166015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,32,8,128,1,float16,fp8,0,6.355926513671875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,32,8,128,1,fp8,fp8,0,6.5012962341308596
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,32,32,128,1,float16,float16,0,4.296870422363281
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,32,32,128,1,float16,fp8,0,3.0369295120239257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,32,32,128,1,fp8,fp8,0,3.1759872436523438
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,32,1,128,1,float16,fp8,0,3.313292694091797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,32,1,128,1,fp8,fp8,0,2.906462478637695
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,32,1,128,1,float16,float16,0,4.760908889770508
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,32,2,128,1,float16,fp8,0,3.0315040588378905
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,32,2,128,1,float16,float16,0,5.05432014465332
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,32,2,128,1,fp8,fp8,0,3.097039985656738
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,32,4,128,1,float16,float16,0,4.140635299682617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,32,4,128,1,float16,fp8,0,2.967030334472656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,32,4,128,1,fp8,fp8,0,3.0380720138549804
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,32,8,128,1,float16,fp8,0,3.07010555267334
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,32,8,128,1,float16,float16,0,4.898313522338867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,32,32,128,1,float16,float16,0,2.0259695053100586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,32,8,128,1,fp8,fp8,0,3.2563838958740234
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,32,32,128,1,float16,fp8,0,1.5553327560424806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,32,32,128,1,fp8,fp8,0,1.85928955078125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,32,1,128,1,float16,float16,0,2.2432687759399412
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,32,1,128,1,fp8,fp8,0,1.4604000091552733
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,32,1,128,1,float16,fp8,0,1.6271535873413085
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,32,2,128,1,float16,float16,0,1.6290895462036132
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,32,2,128,1,fp8,fp8,0,1.4438176155090332
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,32,4,128,1,float16,float16,0,1.616739273071289
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,32,2,128,1,float16,fp8,0,2.0910335540771485
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,32,4,128,1,float16,fp8,0,1.6224079132080078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,32,4,128,1,fp8,fp8,0,1.439475154876709
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,32,8,128,1,float16,fp8,0,1.4713871955871582
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,32,8,128,1,float16,float16,0,1.7807039260864257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,32,32,128,1,float16,float16,0,0.9130592346191406
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,32,8,128,1,fp8,fp8,0,1.49857759475708
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,32,32,128,1,float16,fp8,0,0.8229887962341309
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,32,1,128,1,float16,float16,0,0.9275456428527832
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,32,32,128,1,fp8,fp8,0,1.307596778869629
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,32,1,128,1,float16,fp8,0,0.7719071865081787
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,32,1,128,1,fp8,fp8,0,0.764079999923706
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,32,2,128,1,fp8,fp8,0,0.7714367866516113
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,32,2,128,1,float16,fp8,0,0.7971263885498047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,32,2,128,1,float16,float16,0,1.2125328063964844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,32,4,128,1,float16,float16,0,0.861355209350586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,32,4,128,1,float16,fp8,0,0.7639423847198487
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,32,4,128,1,fp8,fp8,0,0.7600063800811767
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,32,8,128,1,float16,fp8,0,0.7636047840118408
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,32,8,128,1,float16,float16,0,1.2205663681030274
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,32,32,128,1,float16,float16,0,0.5049744129180909
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,32,32,128,1,float16,fp8,0,0.450270414352417
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,32,8,128,1,fp8,fp8,0,0.7650320053100585
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,32,1,128,1,float16,float16,0,0.47263360023498535
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,32,1,128,1,float16,fp8,0,0.4208847999572754
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,32,32,128,1,fp8,fp8,0,0.609494400024414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,32,1,128,1,fp8,fp8,0,0.42343840599060056
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,32,2,128,1,float16,fp8,0,0.4246240139007568
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,32,2,128,1,fp8,fp8,0,0.4309535980224609
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,32,2,128,1,float16,float16,0,0.5188432216644288
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,32,4,128,1,float16,float16,0,0.47985119819641114
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,32,4,128,1,float16,fp8,0,0.42013120651245117
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,32,4,128,1,fp8,fp8,0,0.43080639839172363
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,32,8,128,1,float16,fp8,0,0.4238431930541992
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,32,8,128,1,float16,float16,0,0.5265439987182617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,32,8,128,1,fp8,fp8,0,0.42002239227294924
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,32,1,128,1,float16,fp8,0,3.376134490966797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,32,1,128,1,fp8,fp8,0,3.414689636230469
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,32,1,128,1,float16,float16,0,5.426424026489258
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,32,2,128,1,float16,float16,0,5.148660659790039
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,32,2,128,1,float16,fp8,0,3.3489151000976562
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,32,2,128,1,fp8,fp8,0,3.5870590209960938
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,32,4,128,1,float16,fp8,0,3.5586273193359377
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,32,4,128,1,float16,float16,0,5.438910293579101
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,32,4,128,1,fp8,fp8,0,3.468017578125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,32,8,128,1,float16,fp8,0,3.2779056549072267
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,32,32,128,1,float16,float16,0,2.525555229187012
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,32,8,128,1,float16,float16,0,6.0756065368652346
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,32,8,128,1,fp8,fp8,0,3.596516799926758
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,32,32,128,1,float16,fp8,0,1.9027791976928712
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,32,32,128,1,fp8,fp8,0,2.1736032485961916
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,32,1,128,1,float16,fp8,0,1.7160255432128906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,32,1,128,1,float16,float16,0,2.9812992095947264
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,32,1,128,1,fp8,fp8,0,1.7302896499633789
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,32,2,128,1,float16,float16,0,2.0111631393432616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,32,2,128,1,float16,fp8,0,1.708336067199707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,32,4,128,1,float16,float16,0,1.864049530029297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,32,2,128,1,fp8,fp8,0,2.298883247375488
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,32,4,128,1,fp8,fp8,0,1.7054048538208009
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,32,4,128,1,float16,fp8,0,2.1666351318359376
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,32,8,128,1,float16,float16,0,1.9952543258666993
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,32,8,128,1,float16,fp8,0,1.7467327117919922
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,32,8,128,1,fp8,fp8,0,1.6795408248901367
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,32,32,128,1,float16,float16,0,1.7749967575073242
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,32,32,128,1,float16,fp8,0,0.9962063789367676
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,32,32,128,1,fp8,fp8,0,0.9651455879211426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,32,1,128,1,float16,fp8,0,0.8756815910339355
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,32,1,128,1,fp8,fp8,0,0.8886544227600097
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,32,1,128,1,float16,float16,0,1.6044624328613282
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,32,2,128,1,float16,float16,0,0.9497648239135742
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,32,2,128,1,float16,fp8,0,0.8690928459167481
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,32,4,128,1,float16,fp8,0,0.8648544311523437
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,32,4,128,1,float16,float16,0,1.129105567932129
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,32,2,128,1,fp8,fp8,0,1.3568623542785645
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,32,4,128,1,fp8,fp8,0,0.8586959838867188
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,32,8,128,1,float16,float16,0,0.9993087768554687
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,32,32,128,1,float16,float16,0,0.5689551830291748
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,32,8,128,1,float16,fp8,0,1.0695679664611817
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,32,8,128,1,fp8,fp8,0,1.1394960403442382
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,32,32,128,1,fp8,fp8,0,0.5029600143432618
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,32,32,128,1,float16,fp8,0,0.714844799041748
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,32,1,128,1,float16,float16,0,0.5183839797973633
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,32,1,128,1,float16,fp8,0,0.47674880027770994
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,32,1,128,1,fp8,fp8,0,0.46584320068359375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,32,2,128,1,float16,fp8,0,0.466428804397583
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,32,2,128,1,fp8,fp8,0,0.47681598663330077
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,32,2,128,1,float16,float16,0,0.5669119834899903
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,32,4,128,1,float16,float16,0,0.5209663867950439
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,32,4,128,1,float16,fp8,0,0.46778078079223634
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,32,4,128,1,fp8,fp8,0,0.4646031856536865
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,32,8,128,1,float16,float16,0,0.5503615856170654
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,32,32,128,1,float16,float16,0,0.31578080654144286
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,32,8,128,1,fp8,fp8,0,0.46363201141357424
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,32,8,128,1,float16,fp8,0,0.46378240585327146
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,32,32,128,1,float16,fp8,0,0.3077888011932373
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,32,32,128,1,fp8,fp8,0,0.2823551893234253
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,32,1,128,1,float16,float16,0,0.286844801902771
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,32,1,128,1,float16,fp8,0,0.26080319881439207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,32,1,128,1,fp8,fp8,0,0.28807520866394043
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,32,2,128,1,float16,float16,0,0.28877599239349366
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,32,2,128,1,float16,fp8,0,0.25897600650787356
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,32,2,128,1,fp8,fp8,0,0.2612368106842041
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,32,4,128,1,float16,float16,0,0.2946943998336792
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,32,4,128,1,float16,fp8,0,0.26035521030426023
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,32,4,128,1,fp8,fp8,0,0.25946080684661865
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,32,8,128,1,float16,fp8,0,0.26200640201568604
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,32,8,128,1,float16,float16,0,0.29896318912506104
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,32,8,128,1,fp8,fp8,0,0.26024479866027833
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,32,1,128,1,float16,fp8,0,3.153023910522461
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,32,1,128,1,fp8,fp8,0,3.12618408203125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,32,1,128,1,float16,float16,0,4.750012969970703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,32,2,128,1,float16,float16,0,4.448644638061523
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,32,2,128,1,float16,fp8,0,3.202475357055664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,32,2,128,1,fp8,fp8,0,3.174630355834961
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,32,4,128,1,float16,fp8,0,3.4455745697021483
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,32,4,128,1,float16,float16,0,5.101796722412109
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,32,4,128,1,fp8,fp8,0,3.166641616821289
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,32,8,128,1,float16,float16,0,4.826707077026367
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,32,8,128,1,float16,fp8,0,3.30537109375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,32,32,128,1,float16,float16,0,2.424937629699707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,32,8,128,1,fp8,fp8,0,3.3458927154541014
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,32,32,128,1,float16,fp8,0,1.9015024185180665
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,32,32,128,1,fp8,fp8,0,1.9353744506835937
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,32,1,128,1,float16,float16,0,2.509939193725586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,32,1,128,1,float16,fp8,0,1.6532255172729493
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,32,1,128,1,fp8,fp8,0,1.5791152000427247
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,32,2,128,1,float16,float16,0,2.0390623092651365
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,32,2,128,1,fp8,fp8,0,1.5967040061950684
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,32,2,128,1,float16,fp8,0,1.699545669555664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,32,4,128,1,float16,float16,0,1.811147117614746
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,32,4,128,1,fp8,fp8,0,1.5813872337341308
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,32,4,128,1,float16,fp8,0,2.0746416091918944
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,32,8,128,1,float16,float16,0,2.0606096267700194
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,32,8,128,1,float16,fp8,0,2.1096303939819334
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,32,32,128,1,float16,float16,0,1.0647279739379882
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,32,8,128,1,fp8,fp8,0,1.605611228942871
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,32,32,128,1,fp8,fp8,0,0.9199888229370117
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,32,32,128,1,float16,fp8,0,1.2835760116577148
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,32,1,128,1,float16,float16,0,1.2809231758117676
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,32,1,128,1,float16,fp8,0,0.8278047561645507
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,32,1,128,1,fp8,fp8,0,0.8149231910705567
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,32,2,128,1,float16,float16,0,0.9438400268554688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,32,2,128,1,fp8,fp8,0,0.8223183631896973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,32,2,128,1,float16,fp8,0,1.1862784385681153
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,32,4,128,1,float16,float16,0,0.8955408096313476
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,32,4,128,1,float16,fp8,0,1.2563679695129395
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,32,4,128,1,fp8,fp8,0,0.815283203125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,32,8,128,1,float16,float16,0,0.9239567756652832
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,32,8,128,1,float16,fp8,0,0.8338303565979004
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,32,32,128,1,float16,float16,0,0.5370783805847168
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,32,8,128,1,fp8,fp8,0,0.8217535972595215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,32,32,128,1,float16,fp8,0,0.4885695934295654
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,32,32,128,1,fp8,fp8,0,0.4859903812408447
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,32,1,128,1,float16,float16,0,0.48341760635375974
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,32,1,128,1,float16,fp8,0,0.43860478401184083
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,32,1,128,1,fp8,fp8,0,0.5075759887695312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,32,2,128,1,float16,float16,0,0.47530078887939453
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,32,2,128,1,float16,fp8,0,0.4717951774597168
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,32,2,128,1,fp8,fp8,0,0.4393136024475098
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,32,4,128,1,float16,float16,0,0.4840672016143799
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,32,4,128,1,float16,fp8,0,0.4726255893707275
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,32,4,128,1,fp8,fp8,0,0.43503518104553224
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,32,8,128,1,float16,float16,0,0.49416799545288087
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,32,8,128,1,float16,fp8,0,0.4410672187805176
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,32,8,128,1,fp8,fp8,0,0.43124961853027344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,32,32,128,1,float16,fp8,0,0.2648655891418457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,32,32,128,1,float16,float16,0,0.32812960147857667
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,32,32,128,1,fp8,fp8,0,0.2661007881164551
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,32,1,128,1,float16,float16,0,0.26045119762420654
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,32,1,128,1,float16,fp8,0,0.2881455898284912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,32,1,128,1,fp8,fp8,0,0.238919997215271
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,32,2,128,1,float16,float16,0,0.2604896068572998
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,32,2,128,1,float16,fp8,0,0.2903584003448486
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,32,2,128,1,fp8,fp8,0,0.23855679035186766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,32,4,128,1,float16,float16,0,0.2607872009277344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,32,4,128,1,fp8,fp8,0,0.23865120410919188
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,32,4,128,1,float16,fp8,0,0.289902400970459
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,32,8,128,1,float16,float16,0,0.26639840602874754
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,32,8,128,1,float16,fp8,0,0.23831040859222413
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,32,8,128,1,fp8,fp8,0,0.29130239486694337
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,32,32,128,1,float16,float16,0,0.16845439672470092
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,32,32,128,1,float16,fp8,0,0.153711998462677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,32,32,128,1,fp8,fp8,0,0.1647264003753662
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,32,1,128,1,float16,float16,0,0.14863359928131104
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,32,1,128,1,float16,fp8,0,0.13911199569702148
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,32,1,128,1,fp8,fp8,0,0.13776479959487914
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,32,2,128,1,float16,float16,0,0.14861279726028442
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,32,2,128,1,float16,fp8,0,0.13773759603500366
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,32,2,128,1,fp8,fp8,0,0.13753119707107545
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,32,4,128,1,float16,float16,0,0.149782395362854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,32,4,128,1,float16,fp8,0,0.1391759991645813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,32,4,128,1,fp8,fp8,0,0.13721920251846315
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,32,8,128,1,float16,float16,0,0.15182080268859863
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,32,8,128,1,float16,fp8,0,0.13944000005722046
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,32,8,128,1,fp8,fp8,0,0.13716800212860109
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,32,1,128,1,float16,fp8,0,1.891321563720703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,32,1,128,1,float16,float16,0,1.9829103469848632
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,32,1,128,1,fp8,fp8,0,1.8718591690063477
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,32,2,128,1,float16,float16,0,1.993631935119629
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,32,2,128,1,float16,fp8,0,1.8952144622802733
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,32,4,128,1,float16,float16,0,2.1235599517822266
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,32,2,128,1,fp8,fp8,0,2.100409507751465
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,32,4,128,1,fp8,fp8,0,1.9372543334960937
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,32,4,128,1,float16,fp8,0,1.9869264602661132
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,32,8,128,1,float16,fp8,0,1.894424057006836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,32,8,128,1,float16,float16,0,2.7977088928222655
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,32,32,128,1,float16,float16,0,1.248031997680664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,32,32,128,1,float16,fp8,0,1.1344544410705566
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,32,8,128,1,fp8,fp8,0,1.9027151107788085
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,32,1,128,1,float16,float16,0,1.0638863563537597
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,32,1,128,1,float16,fp8,0,0.9626959800720215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,32,32,128,1,fp8,fp8,0,1.4235872268676757
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,32,1,128,1,fp8,fp8,0,0.9817935943603515
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,32,2,128,1,float16,fp8,0,0.9664400100708008
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,32,2,128,1,fp8,fp8,0,1.0196720123291017
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,32,4,128,1,float16,float16,0,1.0727984428405761
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,32,2,128,1,float16,float16,0,1.4703791618347168
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,32,4,128,1,float16,fp8,0,0.9591856002807617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,32,4,128,1,fp8,fp8,0,0.9572272300720215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,32,8,128,1,float16,fp8,0,1.1915184020996095
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,32,8,128,1,float16,float16,0,1.4035759925842286
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,32,32,128,1,float16,float16,0,0.6469552040100097
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,32,8,128,1,fp8,fp8,0,0.9666720390319824
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,32,32,128,1,float16,fp8,0,0.5802576065063476
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,32,1,128,1,float16,float16,0,0.5494959831237793
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,32,1,128,1,float16,fp8,0,0.5046319961547852
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,32,32,128,1,fp8,fp8,0,0.8586943626403809
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,32,1,128,1,fp8,fp8,0,0.5049424171447754
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,32,2,128,1,float16,fp8,0,0.4997504234313965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,32,2,128,1,float16,float16,0,0.6370255947113037
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,32,2,128,1,fp8,fp8,0,0.5212704181671143
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,32,4,128,1,float16,fp8,0,0.5019904136657715
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,32,4,128,1,float16,float16,0,0.5499279975891114
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,32,4,128,1,fp8,fp8,0,0.6328576087951661
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,32,8,128,1,float16,float16,0,0.5700255870819092
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,32,8,128,1,float16,fp8,0,0.4983776092529297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,32,8,128,1,fp8,fp8,0,0.5012095928192138
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,32,32,128,1,float16,float16,0,0.39519999027252195
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,32,32,128,1,float16,fp8,0,0.31336801052093505
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,32,32,128,1,fp8,fp8,0,0.31139841079711916
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,32,1,128,1,float16,float16,0,0.2947007894515991
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,32,1,128,1,fp8,fp8,0,0.2741695880889893
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,32,1,128,1,float16,fp8,0,0.30365920066833496
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,32,2,128,1,float16,float16,0,0.2900415897369385
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,32,2,128,1,fp8,fp8,0,0.27477920055389404
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,32,2,128,1,float16,fp8,0,0.2729088068008423
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,32,4,128,1,float16,float16,0,0.2985152006149292
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,32,4,128,1,float16,fp8,0,0.26993598937988283
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,32,4,128,1,fp8,fp8,0,0.2726815938949585
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,32,8,128,1,float16,float16,0,0.3043936014175415
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,32,8,128,1,float16,fp8,0,0.277841591835022
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,32,8,128,1,fp8,fp8,0,0.2689807891845703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,32,32,128,1,float16,float16,0,0.19307039976119994
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,32,32,128,1,float16,fp8,0,0.17216960191726685
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,32,32,128,1,fp8,fp8,0,0.17768640518188478
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,32,1,128,1,float16,float16,0,0.1612048029899597
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,32,1,128,1,float16,fp8,0,0.15305919647216798
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,32,1,128,1,fp8,fp8,0,0.15048480033874512
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,32,2,128,1,float16,float16,0,0.16436320543289185
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,32,2,128,1,float16,fp8,0,0.14843679666519166
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,32,2,128,1,fp8,fp8,0,0.1508303999900818
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,32,4,128,1,float16,float16,0,0.16583679914474486
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,32,4,128,1,float16,fp8,0,0.14840320348739625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,32,4,128,1,fp8,fp8,0,0.15073440074920655
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,32,8,128,1,float16,float16,0,0.16945760250091552
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,32,8,128,1,float16,fp8,0,0.14869439601898193
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,32,8,128,1,fp8,fp8,0,0.15172799825668334
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,32,32,128,1,float16,float16,0,0.1109120011329651
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,32,32,128,1,float16,fp8,0,0.10046720504760742
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,32,32,128,1,fp8,fp8,0,0.09940639734268189
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,32,1,128,1,float16,float16,0,0.09598079919815064
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,32,1,128,1,float16,fp8,0,0.08935520052909851
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,32,1,128,1,fp8,fp8,0,0.08930720090866089
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,32,2,128,1,float16,float16,0,0.09619680047035217
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,32,2,128,1,float16,fp8,0,0.0895247995853424
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,32,2,128,1,fp8,fp8,0,0.08999040126800537
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,32,4,128,1,float16,float16,0,0.0966816008090973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,32,4,128,1,float16,fp8,0,0.08883200287818908
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,32,4,128,1,fp8,fp8,0,0.0893775999546051
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,32,8,128,1,float16,float16,0,0.09881119728088379
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,32,8,128,1,float16,fp8,0,0.08869760036468506
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,32,8,128,1,fp8,fp8,0,0.08920320272445678
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,32,1,128,1,float16,fp8,0,1.8851583480834961
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,32,1,128,1,fp8,fp8,0,1.8826799392700195
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,32,1,128,1,float16,float16,0,2.155753517150879
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,32,2,128,1,float16,fp8,0,1.8798240661621093
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,32,2,128,1,float16,float16,0,2.122233581542969
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,32,2,128,1,fp8,fp8,0,1.8904815673828126
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,32,4,128,1,float16,float16,0,1.9886959075927735
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,32,4,128,1,fp8,fp8,0,1.908238410949707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,32,4,128,1,float16,fp8,0,2.362228775024414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,32,8,128,1,float16,float16,0,2.091516876220703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,32,32,128,1,float16,float16,0,1.2940735816955566
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,32,8,128,1,float16,fp8,0,1.9937519073486327
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,32,8,128,1,fp8,fp8,0,1.896384048461914
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,32,32,128,1,float16,fp8,0,1.5105376243591309
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,32,1,128,1,float16,float16,0,1.0091168403625488
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,32,1,128,1,float16,fp8,0,1.0097583770751952
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,32,32,128,1,fp8,fp8,0,1.277683162689209
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,32,1,128,1,fp8,fp8,0,0.9984335899353027
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,32,2,128,1,float16,fp8,0,0.9818479537963867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,32,2,128,1,fp8,fp8,0,0.9571311950683594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,32,2,128,1,float16,float16,0,1.3694543838500977
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,32,4,128,1,float16,float16,0,1.0266048431396484
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,32,4,128,1,float16,fp8,0,0.9731504440307617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,32,4,128,1,fp8,fp8,0,0.9551296234130859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,32,8,128,1,float16,fp8,0,0.9628959655761719
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,32,8,128,1,float16,float16,0,1.385262393951416
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,32,32,128,1,float16,float16,0,0.6631199836730957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,32,8,128,1,fp8,fp8,0,0.9520751953125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,32,32,128,1,float16,fp8,0,0.6102735996246338
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,32,1,128,1,float16,float16,0,0.5176832199096679
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,32,32,128,1,fp8,fp8,0,0.7829887866973877
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,32,1,128,1,float16,fp8,0,0.49982080459594724
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,32,1,128,1,fp8,fp8,0,0.5027423858642578
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,32,2,128,1,float16,float16,0,0.5179743766784668
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,32,2,128,1,fp8,fp8,0,0.49922881126403806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,32,2,128,1,float16,fp8,0,0.6051887989044189
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,32,4,128,1,float16,float16,0,0.5339680194854737
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,32,4,128,1,float16,fp8,0,0.52641921043396
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,32,8,128,1,float16,float16,0,0.5502463817596436
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,32,4,128,1,fp8,fp8,0,0.4962639808654785
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,32,8,128,1,float16,fp8,0,0.4938352108001709
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,32,8,128,1,fp8,fp8,0,0.5063072204589844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,32,32,128,1,float16,float16,0,0.34960958957672117
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,32,32,128,1,float16,fp8,0,0.3185951948165894
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,32,32,128,1,fp8,fp8,0,0.31951680183410647
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,32,1,128,1,float16,float16,0,0.27538399696350097
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,32,1,128,1,float16,fp8,0,0.26596798896789553
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,32,1,128,1,fp8,fp8,0,0.2673840045928955
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,32,2,128,1,float16,float16,0,0.2785088062286377
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,32,2,128,1,float16,fp8,0,0.2629584074020386
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,32,2,128,1,fp8,fp8,0,0.27642719745635985
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,32,4,128,1,float16,float16,0,0.28432159423828124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,32,4,128,1,float16,fp8,0,0.26452639102935793
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,32,4,128,1,fp8,fp8,0,0.26157279014587403
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,32,8,128,1,float16,float16,0,0.29991199970245364
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,32,8,128,1,float16,fp8,0,0.2651087999343872
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,32,8,128,1,fp8,fp8,0,0.26213600635528567
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,32,32,128,1,float16,float16,0,0.18769439458847045
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,32,32,128,1,float16,fp8,0,0.17549439668655395
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,32,32,128,1,fp8,fp8,0,0.17422239780426024
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,32,1,128,1,float16,float16,0,0.15538400411605835
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,32,1,128,1,float16,fp8,0,0.14248000383377074
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,32,1,128,1,fp8,fp8,0,0.14559680223464966
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,32,2,128,1,float16,float16,0,0.15280959606170655
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,32,2,128,1,float16,fp8,0,0.1457535982131958
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,32,2,128,1,fp8,fp8,0,0.14200799465179442
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,32,4,128,1,float16,float16,0,0.1569264054298401
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,32,4,128,1,float16,fp8,0,0.14312319755554198
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,32,4,128,1,fp8,fp8,0,0.1444000005722046
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,32,8,128,1,float16,float16,0,0.1593135952949524
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,32,32,128,1,float16,fp8,0,0.097324800491333
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,32,8,128,1,float16,fp8,0,0.1449552059173584
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,32,32,128,1,float16,float16,0,0.1083024024963379
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,32,8,128,1,fp8,fp8,0,0.14439840316772462
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,32,32,128,1,fp8,fp8,0,0.09789599776268006
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,32,1,128,1,float16,float16,0,0.08776479959487915
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,32,1,128,1,float16,fp8,0,0.08290560245513916
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,32,1,128,1,fp8,fp8,0,0.08236479759216309
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,32,2,128,1,float16,float16,0,0.08731200098991394
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,32,2,128,1,float16,fp8,0,0.08269919753074646
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,32,2,128,1,fp8,fp8,0,0.08230080008506775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,32,4,128,1,float16,float16,0,0.08834559917449951
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,32,4,128,1,fp8,fp8,0,0.08271840214729309
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,32,4,128,1,float16,fp8,0,0.08266559839248658
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,32,8,128,1,float16,float16,0,0.09096639752388
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,32,8,128,1,float16,fp8,0,0.08322719931602478
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,32,8,128,1,fp8,fp8,0,0.0823087990283966
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,32,32,128,1,float16,float16,0,0.06405760049819946
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,32,32,128,1,float16,fp8,0,0.0597711980342865
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,32,32,128,1,fp8,fp8,0,0.05974079966545105
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,32,1,128,1,float16,float16,0,0.05610560178756714
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,32,1,128,1,float16,fp8,0,0.053569602966308597
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,32,1,128,1,fp8,fp8,0,0.05365440249443054
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,32,2,128,1,float16,float16,0,0.05607360005378723
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,32,2,128,1,float16,fp8,0,0.053465598821640016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,32,2,128,1,fp8,fp8,0,0.05337280035018921
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,32,4,128,1,float16,float16,0,0.057499200105667114
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,32,4,128,1,float16,fp8,0,0.05371999740600586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,32,4,128,1,fp8,fp8,0,0.05368800163269043
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,32,8,128,1,float16,float16,0,0.05771840214729309
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,32,8,128,1,float16,fp8,0,0.053257602453231814
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,32,8,128,1,fp8,fp8,0,0.05406879782676697
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,32,1,128,1,float16,float16,0,1.2230655670166015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,32,1,128,1,float16,fp8,0,1.1899344444274902
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,32,1,128,1,fp8,fp8,0,1.194918441772461
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,32,2,128,1,float16,fp8,0,1.1849984169006347
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,32,2,128,1,float16,float16,0,1.2917152404785157
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,32,2,128,1,fp8,fp8,0,1.1915712356567383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,32,4,128,1,float16,float16,0,1.2458111763000488
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,32,4,128,1,fp8,fp8,0,1.1892720222473145
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,32,4,128,1,float16,fp8,0,1.551187229156494
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,32,8,128,1,float16,fp8,0,1.1867152214050294
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,32,8,128,1,float16,float16,0,1.4570351600646974
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,32,8,128,1,fp8,fp8,0,1.1850879669189454
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,32,32,128,1,float16,float16,0,0.848795223236084
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,32,32,128,1,float16,fp8,0,0.7719935894012451
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,32,1,128,1,float16,float16,0,0.6316527843475341
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,32,1,128,1,float16,fp8,0,0.6118624210357666
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,32,32,128,1,fp8,fp8,0,0.9207183837890625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,32,1,128,1,fp8,fp8,0,0.613316822052002
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,32,2,128,1,float16,float16,0,0.6298912048339844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,32,2,128,1,float16,fp8,0,0.6100128173828125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,32,2,128,1,fp8,fp8,0,0.7392144203186035
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,32,4,128,1,float16,float16,0,0.6459887981414795
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,32,4,128,1,float16,fp8,0,0.6947375774383545
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,32,4,128,1,fp8,fp8,0,0.6078464031219483
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,32,8,128,1,float16,float16,0,0.6781055927276611
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,32,8,128,1,float16,fp8,0,0.6149199962615967
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,32,32,128,1,float16,float16,0,0.43828320503234863
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,32,8,128,1,fp8,fp8,0,0.6858272075653076
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,32,32,128,1,float16,fp8,0,0.4144320011138916
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,32,32,128,1,fp8,fp8,0,0.4004367828369141
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,32,1,128,1,float16,fp8,0,0.3239840030670166
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,32,1,128,1,float16,float16,0,0.32346720695495607
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,32,1,128,1,fp8,fp8,0,0.32955520153045653
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,32,2,128,1,float16,float16,0,0.3309231996536255
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,32,2,128,1,float16,fp8,0,0.31964960098266604
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,32,2,128,1,fp8,fp8,0,0.3192960023880005
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,32,4,128,1,float16,float16,0,0.33882880210876465
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,32,4,128,1,float16,fp8,0,0.3199712038040161
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,32,4,128,1,fp8,fp8,0,0.3199280023574829
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,32,8,128,1,float16,float16,0,0.35041279792785646
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,32,8,128,1,float16,fp8,0,0.3194080114364624
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,32,8,128,1,fp8,fp8,0,0.3184736013412476
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,32,32,128,1,float16,float16,0,0.2325984001159668
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,32,32,128,1,fp8,fp8,0,0.2146991968154907
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,32,32,128,1,float16,fp8,0,0.2129663944244385
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,32,1,128,1,float16,float16,0,0.17962559461593627
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,32,1,128,1,float16,fp8,0,0.1720736026763916
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,32,1,128,1,fp8,fp8,0,0.1719599962234497
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,32,2,128,1,float16,float16,0,0.17583999633789063
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,32,2,128,1,float16,fp8,0,0.17271039485931397
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,32,2,128,1,fp8,fp8,0,0.17192000150680542
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,32,4,128,1,float16,float16,0,0.18085919618606566
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,32,4,128,1,float16,fp8,0,0.16979680061340333
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,32,4,128,1,fp8,fp8,0,0.17231359481811523
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,32,8,128,1,float16,float16,0,0.19001280069351195
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,32,8,128,1,float16,fp8,0,0.1700991988182068
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,32,8,128,1,fp8,fp8,0,0.1696239948272705
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,32,32,128,1,float16,float16,0,0.12785120010375978
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,32,32,128,1,float16,fp8,0,0.11673439741134643
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,32,32,128,1,fp8,fp8,0,0.11697440147399903
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,32,1,128,1,float16,float16,0,0.09699199795722961
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,32,1,128,1,fp8,fp8,0,0.09462080001831055
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,32,1,128,1,float16,fp8,0,0.09476320147514343
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,32,2,128,1,float16,float16,0,0.09885280132293701
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,32,2,128,1,float16,fp8,0,0.09334080219268799
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,32,2,128,1,fp8,fp8,0,0.09448000192642211
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,32,4,128,1,float16,float16,0,0.10097440481185913
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,32,4,128,1,float16,fp8,0,0.09428480267524719
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,32,4,128,1,fp8,fp8,0,0.09491519927978516
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,32,8,128,1,float16,float16,0,0.10586240291595458
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,32,8,128,1,float16,fp8,0,0.09414719939231872
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,32,8,128,1,fp8,fp8,0,0.09509119987487794
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,32,32,128,1,float16,fp8,0,0.06860640048980712
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,32,32,128,1,float16,float16,0,0.0729744017124176
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,32,1,128,1,float16,float16,0,0.05995519757270813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,32,32,128,1,fp8,fp8,0,0.0685696005821228
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,32,1,128,1,float16,fp8,0,0.05755199790000916
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,32,1,128,1,fp8,fp8,0,0.05797759890556335
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,32,2,128,1,float16,float16,0,0.060343998670578006
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,32,2,128,1,float16,fp8,0,0.05697119832038879
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,32,2,128,1,fp8,fp8,0,0.05841599702835083
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,32,4,128,1,float16,float16,0,0.06077280044555664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,32,4,128,1,float16,fp8,0,0.057196801900863646
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,32,4,128,1,fp8,fp8,0,0.05891839861869812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,32,8,128,1,float16,float16,0,0.062331199645996094
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,32,8,128,1,float16,fp8,0,0.05737119913101196
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,32,32,128,1,float16,float16,0,0.044121599197387694
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,32,8,128,1,fp8,fp8,0,0.05780320167541504
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,32,32,128,1,fp8,fp8,0,0.04336479902267456
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,32,1,128,1,float16,float16,0,0.0396912008523941
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,32,32,128,1,float16,fp8,0,0.04320639967918396
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,32,1,128,1,float16,fp8,0,0.038841599225997926
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,32,1,128,1,fp8,fp8,0,0.03830080032348633
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,32,2,128,1,float16,float16,0,0.03914240002632141
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,32,2,128,1,float16,fp8,0,0.039059200882911684
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,32,2,128,1,fp8,fp8,0,0.03880159854888916
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,32,4,128,1,float16,float16,0,0.039577600359916684
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,32,4,128,1,fp8,fp8,0,0.03783040046691895
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,32,4,128,1,float16,fp8,0,0.039099198579788205
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,32,8,128,1,float16,float16,0,0.04121760129928589
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,32,8,128,1,float16,fp8,0,0.037487998604774475
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,32,8,128,1,fp8,fp8,0,0.03902879953384399
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,32,1,128,1,float16,float16,0,1.2774255752563477
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,32,1,128,1,float16,fp8,0,1.2678463935852051
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,32,1,128,1,fp8,fp8,0,1.27467679977417
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,32,2,128,1,float16,float16,0,1.3426272392272949
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,32,2,128,1,float16,fp8,0,1.2640527725219726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,32,2,128,1,fp8,fp8,0,1.270577621459961
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,32,4,128,1,float16,fp8,0,1.2639967918395996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,32,4,128,1,fp8,fp8,0,1.26845121383667
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,32,4,128,1,float16,float16,0,1.4365247726440429
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,32,8,128,1,float16,fp8,0,1.266708755493164
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,32,8,128,1,float16,float16,0,1.3780223846435546
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,32,32,128,1,float16,float16,0,0.9549920082092285
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,32,8,128,1,fp8,fp8,0,1.591881561279297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,32,32,128,1,float16,fp8,0,0.8629759788513184
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,32,1,128,1,float16,float16,0,0.650987195968628
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,32,1,128,1,float16,fp8,0,0.6512991905212402
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,32,32,128,1,fp8,fp8,0,0.9491951942443848
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,32,1,128,1,fp8,fp8,0,0.6519872188568115
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,32,2,128,1,float16,fp8,0,0.648964786529541
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,32,2,128,1,float16,float16,0,0.6506415843963623
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,32,2,128,1,fp8,fp8,0,0.716428804397583
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,32,4,128,1,float16,float16,0,0.6722511768341064
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,32,4,128,1,float16,fp8,0,0.6454080104827881
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,32,4,128,1,fp8,fp8,0,0.6593200206756592
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,32,8,128,1,float16,float16,0,0.7057472229003906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,32,32,128,1,float16,float16,0,0.5296480178833007
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,32,8,128,1,fp8,fp8,0,0.6445072174072266
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,32,32,128,1,float16,fp8,0,0.4435296058654785
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,32,8,128,1,float16,fp8,0,0.6492688179016113
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,32,1,128,1,float16,float16,0,0.33512799739837645
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,32,1,128,1,fp8,fp8,0,0.3408735990524292
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,32,1,128,1,float16,fp8,0,0.33726561069488525
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,32,32,128,1,fp8,fp8,0,0.44972801208496094
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,32,2,128,1,float16,float16,0,0.33394560813903806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,32,2,128,1,float16,fp8,0,0.339681601524353
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,32,2,128,1,fp8,fp8,0,0.33743839263916015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,32,4,128,1,float16,float16,0,0.34959518909454346
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,32,4,128,1,float16,fp8,0,0.3348576068878174
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,32,4,128,1,fp8,fp8,0,0.3368016004562378
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,32,8,128,1,float16,float16,0,0.3681600093841553
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,32,8,128,1,float16,fp8,0,0.33385601043701174
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,32,32,128,1,float16,float16,0,0.25470240116119386
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,32,8,128,1,fp8,fp8,0,0.3330575942993164
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,32,32,128,1,float16,fp8,0,0.2332223892211914
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,32,32,128,1,fp8,fp8,0,0.2325808048248291
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,32,1,128,1,float16,float16,0,0.18302719593048095
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,32,1,128,1,float16,fp8,0,0.17779840230941774
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,32,1,128,1,fp8,fp8,0,0.1770815968513489
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,32,2,128,1,float16,float16,0,0.18010400533676146
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,32,2,128,1,float16,fp8,0,0.1794975996017456
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,32,2,128,1,fp8,fp8,0,0.17719520330429078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,32,4,128,1,float16,float16,0,0.18442560434341432
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,32,4,128,1,float16,fp8,0,0.17840640544891356
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,32,4,128,1,fp8,fp8,0,0.1773327946662903
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,32,8,128,1,float16,float16,0,0.201528000831604
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,32,8,128,1,float16,fp8,0,0.17781920433044435
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,32,8,128,1,fp8,fp8,0,0.1773584008216858
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,32,32,128,1,float16,float16,0,0.13636159896850586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,32,32,128,1,float16,fp8,0,0.12519359588623047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,32,32,128,1,fp8,fp8,0,0.12541760206222535
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,32,1,128,1,float16,float16,0,0.10078719854354859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,32,1,128,1,float16,fp8,0,0.09658719897270203
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,32,1,128,1,fp8,fp8,0,0.09617279767990113
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,32,2,128,1,float16,float16,0,0.1008031964302063
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,32,2,128,1,float16,fp8,0,0.09749119877815246
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,32,2,128,1,fp8,fp8,0,0.09659839868545532
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,32,4,128,1,float16,float16,0,0.10096160173416138
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,32,4,128,1,float16,fp8,0,0.09729120135307312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,32,4,128,1,fp8,fp8,0,0.09687039852142335
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,32,8,128,1,float16,float16,0,0.10745760202407836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,32,8,128,1,float16,fp8,0,0.09686080217361451
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,32,32,128,1,float16,float16,0,0.07661759853363037
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,32,8,128,1,fp8,fp8,0,0.0981440007686615
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,32,32,128,1,float16,fp8,0,0.07108160257339477
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,32,1,128,1,float16,float16,0,0.05648959875106811
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,32,32,128,1,fp8,fp8,0,0.07063360214233398
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,32,1,128,1,float16,fp8,0,0.056846398115158084
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,32,1,128,1,fp8,fp8,0,0.05587999820709229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,32,2,128,1,float16,float16,0,0.057704001665115356
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,32,2,128,1,float16,fp8,0,0.05591840147972107
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,32,2,128,1,fp8,fp8,0,0.05630559921264648
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,32,4,128,1,float16,float16,0,0.05836480259895325
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,32,4,128,1,float16,fp8,0,0.05678079724311828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,32,4,128,1,fp8,fp8,0,0.05573760271072388
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,32,8,128,1,float16,float16,0,0.06189280152320862
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,32,8,128,1,float16,fp8,0,0.05565919876098633
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,32,8,128,1,fp8,fp8,0,0.056148797273635864
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,32,32,128,1,float16,fp8,0,0.043244799971580504
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,32,32,128,1,float16,float16,0,0.04502559900283813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,32,32,128,1,fp8,fp8,0,0.04336319863796234
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,32,1,128,1,float16,float16,0,0.03733760118484497
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,32,1,128,1,float16,fp8,0,0.03675360083580017
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,32,1,128,1,fp8,fp8,0,0.03691039979457855
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,32,2,128,1,float16,float16,0,0.03746559917926788
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,32,2,128,1,float16,fp8,0,0.03697440028190613
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,32,2,128,1,fp8,fp8,0,0.037001600861549376
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,32,4,128,1,float16,float16,0,0.03840320110321045
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,32,4,128,1,float16,fp8,0,0.03683519959449768
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,32,4,128,1,fp8,fp8,0,0.03678080141544342
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,32,8,128,1,float16,float16,0,0.03933440148830414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,32,8,128,1,float16,fp8,0,0.03622879981994629
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,32,32,128,1,float16,float16,0,0.027235201001167296
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,32,8,128,1,fp8,fp8,0,0.03648000061511993
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,32,32,128,1,float16,fp8,0,0.02682720124721527
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,32,1,128,1,float16,float16,0,0.02287199944257736
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,32,32,128,1,fp8,fp8,0,0.026952001452445983
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,32,1,128,1,float16,fp8,0,0.02274080067873001
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,32,2,128,1,float16,float16,0,0.023095999658107758
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,32,1,128,1,fp8,fp8,0,0.02282080054283142
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,32,2,128,1,float16,fp8,0,0.02274879962205887
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,32,4,128,1,float16,float16,0,0.024611200392246246
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,32,2,128,1,fp8,fp8,0,0.022809599339962006
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,32,4,128,1,float16,fp8,0,0.02280000001192093
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,32,8,128,1,float16,float16,0,0.024905599653720856
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,32,4,128,1,fp8,fp8,0,0.02298399955034256
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,32,8,128,1,float16,fp8,0,0.0227183997631073
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,32,8,128,1,fp8,fp8,0,0.02280000001192093
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,32,1,128,1,float16,float16,0,0.9258463859558106
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,32,1,128,1,float16,fp8,0,0.9616352081298828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,32,1,128,1,fp8,fp8,0,0.9666015625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,32,2,128,1,float16,float16,0,0.9202239990234375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,32,2,128,1,float16,fp8,0,0.9586527824401856
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,32,2,128,1,fp8,fp8,0,0.9627599716186523
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,32,4,128,1,float16,float16,0,0.9559040069580078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,32,4,128,1,float16,fp8,0,0.9573295593261719
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,32,4,128,1,fp8,fp8,0,0.9592368125915527
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,32,8,128,1,float16,fp8,0,0.9555680274963378
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,32,8,128,1,fp8,fp8,0,0.9559632301330566
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,32,8,128,1,float16,float16,0,1.273960018157959
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,32,32,128,1,float16,float16,0,0.7690847873687744
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,32,1,128,1,float16,float16,0,0.5166687965393066
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,32,32,128,1,float16,fp8,0,0.7023056030273438
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,32,32,128,1,fp8,fp8,0,0.7036704063415528
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,32,1,128,1,float16,fp8,0,0.4912543773651123
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,32,1,128,1,fp8,fp8,0,0.5434256076812745
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,32,2,128,1,float16,float16,0,0.4713759899139404
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,32,2,128,1,float16,fp8,0,0.4931488037109375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,32,2,128,1,fp8,fp8,0,0.49031839370727537
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,32,4,128,1,float16,float16,0,0.4916687965393066
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,32,4,128,1,float16,fp8,0,0.4894927978515625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,32,4,128,1,fp8,fp8,0,0.4913343906402588
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,32,8,128,1,float16,float16,0,0.5291247844696045
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,32,8,128,1,float16,fp8,0,0.4869984149932861
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,32,32,128,1,float16,float16,0,0.39629759788513186
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,32,8,128,1,fp8,fp8,0,0.48669118881225587
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,32,32,128,1,float16,fp8,0,0.35947840213775634
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,32,32,128,1,fp8,fp8,0,0.3649888038635254
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,32,1,128,1,float16,float16,0,0.24946238994598388
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,32,1,128,1,float16,fp8,0,0.25577759742736816
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,32,1,128,1,fp8,fp8,0,0.2558144092559814
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,32,2,128,1,float16,float16,0,0.24625279903411865
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,32,2,128,1,float16,fp8,0,0.2566800117492676
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,32,2,128,1,fp8,fp8,0,0.2547008037567139
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,32,4,128,1,float16,float16,0,0.254313588142395
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,32,4,128,1,float16,fp8,0,0.2536832094192505
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,32,4,128,1,fp8,fp8,0,0.2556096076965332
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,32,8,128,1,float16,float16,0,0.2757296085357666
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,32,8,128,1,float16,fp8,0,0.25177440643310545
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,32,8,128,1,fp8,fp8,0,0.2538719892501831
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,32,32,128,1,float16,float16,0,0.20513761043548584
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,32,32,128,1,float16,fp8,0,0.18822720050811767
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,32,1,128,1,float16,float16,0,0.1318351984024048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,32,1,128,1,float16,fp8,0,0.13408639430999755
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,32,32,128,1,fp8,fp8,0,0.1882863998413086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,32,1,128,1,fp8,fp8,0,0.13411999940872193
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,32,2,128,1,float16,float16,0,0.131494402885437
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,32,2,128,1,float16,fp8,0,0.13500159978866577
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,32,2,128,1,fp8,fp8,0,0.13391040563583373
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,32,4,128,1,float16,float16,0,0.1349120020866394
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,32,4,128,1,float16,fp8,0,0.1335360050201416
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,32,4,128,1,fp8,fp8,0,0.13437119722366334
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,32,8,128,1,float16,float16,0,0.14522080421447753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,32,8,128,1,float16,fp8,0,0.13401919603347778
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,32,8,128,1,fp8,fp8,0,0.13286240100860597
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,32,32,128,1,float16,float16,0,0.10909759998321533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,32,32,128,1,float16,fp8,0,0.10176960229873658
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,32,32,128,1,fp8,fp8,0,0.10100799798965454
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,32,1,128,1,float16,float16,0,0.07275199890136719
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,32,1,128,1,float16,fp8,0,0.07416160106658935
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,32,1,128,1,fp8,fp8,0,0.07198240160942078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,32,2,128,1,float16,float16,0,0.07432479858398437
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,32,2,128,1,float16,fp8,0,0.07384799718856812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,32,2,128,1,fp8,fp8,0,0.0720575988292694
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,32,4,128,1,float16,fp8,0,0.07361279726028443
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,32,4,128,1,float16,float16,0,0.07622560262680053
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,32,4,128,1,fp8,fp8,0,0.07300000190734864
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,32,8,128,1,float16,float16,0,0.08119680285453797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,32,8,128,1,float16,fp8,0,0.07354720234870911
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,32,8,128,1,fp8,fp8,0,0.07356320023536682
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,32,32,128,1,float16,float16,0,0.06128159761428833
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,32,32,128,1,float16,fp8,0,0.05742560029029846
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,32,1,128,1,float16,float16,0,0.041345599293708804
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,32,32,128,1,fp8,fp8,0,0.057415997982025145
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,32,1,128,1,float16,fp8,0,0.04120000004768372
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,32,2,128,1,float16,float16,0,0.04133760035037994
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,32,1,128,1,fp8,fp8,0,0.04181439876556396
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,32,2,128,1,float16,fp8,0,0.042243200540542605
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,32,2,128,1,fp8,fp8,0,0.04144479930400848
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,32,4,128,1,float16,float16,0,0.043182399868965146
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,32,4,128,1,float16,fp8,0,0.0426256000995636
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,32,4,128,1,fp8,fp8,0,0.04116320013999939
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,32,8,128,1,float16,float16,0,0.047137600183486936
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,32,8,128,1,float16,fp8,0,0.042745599150657655
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,32,8,128,1,fp8,fp8,0,0.04181120097637177
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,32,32,128,1,float16,float16,0,0.03388960063457489
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,32,32,128,1,float16,fp8,0,0.03390559852123261
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,32,32,128,1,fp8,fp8,0,0.0349839985370636
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,32,1,128,1,float16,float16,0,0.027025601267814635
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,32,1,128,1,float16,fp8,0,0.026950401067733765
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,32,1,128,1,fp8,fp8,0,0.026943999528884887
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,32,2,128,1,float16,float16,0,0.026974400877952574
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,32,2,128,1,float16,fp8,0,0.026815998554229736
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,32,2,128,1,fp8,fp8,0,0.026924800872802735
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,32,4,128,1,float16,float16,0,0.0288239985704422
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,32,4,128,1,float16,fp8,0,0.02677600085735321
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,32,4,128,1,fp8,fp8,0,0.027046400308609008
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,32,8,128,1,float16,float16,0,0.028916800022125246
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,32,8,128,1,float16,fp8,0,0.026822400093078614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,32,8,128,1,fp8,fp8,0,0.02707839906215668
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,32,32,128,1,float16,fp8,0,0.020718400180339814
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,32,32,128,1,float16,float16,0,0.02067999988794327
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,32,32,128,1,fp8,fp8,0,0.020812800526618956
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,32,1,128,1,float16,float16,0,0.01658879965543747
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,32,1,128,1,float16,fp8,0,0.01717440038919449
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,32,1,128,1,fp8,fp8,0,0.01865600049495697
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,32,2,128,1,float16,float16,0,0.016659200191497803
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,32,2,128,1,float16,fp8,0,0.01820160001516342
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,32,2,128,1,fp8,fp8,0,0.01855839937925339
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,32,4,128,1,float16,float16,0,0.01849119961261749
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,32,4,128,1,float16,fp8,0,0.01852319985628128
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,32,4,128,1,fp8,fp8,0,0.018532800674438476
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,32,8,128,1,float16,float16,0,0.018668800592422485
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,32,8,128,1,float16,fp8,0,0.018606400489807128
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,32,8,128,1,fp8,fp8,0,0.018615999817848207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,32,32,128,1,float16,float16,0,0.01863359957933426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,32,32,128,1,float16,fp8,0,0.018670399487018586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,32,1,128,1,float16,float16,0,0.016633599996566772
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,32,32,128,1,fp8,fp8,0,0.018638400733470915
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,32,1,128,1,float16,fp8,0,0.016574400663375854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,32,1,128,1,fp8,fp8,0,0.016572800278663636
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,32,2,128,1,float16,float16,0,0.01669919937849045
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,32,2,128,1,float16,fp8,0,0.016571199893951415
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,32,2,128,1,fp8,fp8,0,0.016550399363040924
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,32,4,128,1,float16,float16,0,0.01668799966573715
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,32,4,128,1,float16,fp8,0,0.016604800522327424
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,32,4,128,1,fp8,fp8,0,0.01648640036582947
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,32,8,128,1,float16,float16,0,0.016766400635242464
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,32,8,128,1,float16,fp8,0,0.01658080071210861
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,32,8,128,1,fp8,fp8,0,0.01658399999141693
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,32,1,128,1,float16,float16,0,0.3786159992218018
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,32,1,128,1,float16,fp8,0,0.4074528217315674
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,32,1,128,1,fp8,fp8,0,0.4090576171875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,32,2,128,1,float16,float16,0,0.37655038833618165
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,32,2,128,1,float16,fp8,0,0.4065135955810547
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,32,2,128,1,fp8,fp8,0,0.4055776119232178
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,32,4,128,1,float16,float16,0,0.3956576108932495
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,32,4,128,1,float16,fp8,0,0.405350399017334
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,32,4,128,1,fp8,fp8,0,0.4055312156677246
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,32,8,128,1,float16,float16,0,0.4346367835998535
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,32,8,128,1,float16,fp8,0,0.4033199787139893
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,32,8,128,1,fp8,fp8,0,0.40160479545593264
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,32,32,128,1,float16,float16,0,0.34387359619140623
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,32,32,128,1,fp8,fp8,0,0.3138816118240356
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,32,32,128,1,float16,fp8,0,0.31534879207611083
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,32,1,128,1,float16,float16,0,0.19768160581588745
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,32,1,128,1,float16,fp8,0,0.21213920116424562
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,32,2,128,1,float16,float16,0,0.19714239835739136
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,32,1,128,1,fp8,fp8,0,0.20914878845214843
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,32,2,128,1,float16,fp8,0,0.2097264051437378
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,32,2,128,1,fp8,fp8,0,0.21047840118408204
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,32,4,128,1,float16,float16,0,0.20667519569396972
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,32,4,128,1,float16,fp8,0,0.20916481018066407
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,32,4,128,1,fp8,fp8,0,0.2097520112991333
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,32,8,128,1,float16,fp8,0,0.20704638957977295
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,32,8,128,1,float16,float16,0,0.22729918956756592
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,32,8,128,1,fp8,fp8,0,0.2083967924118042
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,32,32,128,1,float16,fp8,0,0.1658463954925537
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,32,32,128,1,float16,float16,0,0.1788800001144409
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,32,32,128,1,fp8,fp8,0,0.16690720319747926
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,32,1,128,1,float16,fp8,0,0.11045440435409545
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,32,1,128,1,fp8,fp8,0,0.11082079410552978
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,32,1,128,1,float16,float16,0,0.10846079587936401
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,32,2,128,1,float16,float16,0,0.10507359504699706
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,32,2,128,1,float16,fp8,0,0.11121920347213746
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,32,2,128,1,fp8,fp8,0,0.11252479553222657
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,32,4,128,1,float16,float16,0,0.11030559539794922
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,32,4,128,1,float16,fp8,0,0.11093440055847167
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,32,4,128,1,fp8,fp8,0,0.11231679916381836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,32,8,128,1,float16,float16,0,0.1195680022239685
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,32,8,128,1,float16,fp8,0,0.11097279787063599
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,32,32,128,1,float16,float16,0,0.09461920261383057
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,32,8,128,1,fp8,fp8,0,0.11212320327758789
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,32,32,128,1,float16,fp8,0,0.08744320273399353
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,32,1,128,1,float16,float16,0,0.0594976007938385
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,32,1,128,1,float16,fp8,0,0.059596800804138185
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,32,32,128,1,fp8,fp8,0,0.08615999817848205
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,32,2,128,1,float16,float16,0,0.0597648024559021
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,32,1,128,1,fp8,fp8,0,0.059646397829055786
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,32,2,128,1,float16,fp8,0,0.058222401142120364
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,32,2,128,1,fp8,fp8,0,0.05841599702835083
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,32,4,128,1,float16,fp8,0,0.059683197736740114
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,32,4,128,1,float16,float16,0,0.06120160222053528
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,32,4,128,1,fp8,fp8,0,0.05926399827003479
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,32,8,128,1,float16,float16,0,0.06471520066261291
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,32,8,128,1,float16,fp8,0,0.0608672022819519
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,32,8,128,1,fp8,fp8,0,0.05975840091705322
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,32,32,128,1,float16,float16,0,0.0550495982170105
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,32,1,128,1,float16,float16,0,0.034980800747871396
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,32,32,128,1,fp8,fp8,0,0.05129759907722473
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,32,32,128,1,float16,fp8,0,0.05149919986724853
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,32,1,128,1,fp8,fp8,0,0.035132798552513125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,32,2,128,1,float16,float16,0,0.03504000008106232
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,32,1,128,1,float16,fp8,0,0.03503200113773346
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,32,2,128,1,float16,fp8,0,0.03521760106086731
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,32,4,128,1,float16,float16,0,0.03536159992218017
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,32,2,128,1,fp8,fp8,0,0.03697920143604279
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,32,4,128,1,float16,fp8,0,0.03517279922962189
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,32,4,128,1,fp8,fp8,0,0.035097599029541016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,32,8,128,1,float16,float16,0,0.039208000898361205
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,32,8,128,1,float16,fp8,0,0.03526880145072937
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,32,8,128,1,fp8,fp8,0,0.03516640067100525
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,32,32,128,1,float16,float16,0,0.028832000494003297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,32,32,128,1,float16,fp8,0,0.028908801078796387
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,32,32,128,1,fp8,fp8,0,0.028867200016975403
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,32,1,128,1,float16,float16,0,0.021934400498867034
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,32,1,128,1,float16,fp8,0,0.0226160004734993
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,32,1,128,1,fp8,fp8,0,0.022089600563049316
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,32,2,128,1,float16,float16,0,0.020929600298404693
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,32,2,128,1,fp8,fp8,0,0.021935999393463135
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,32,2,128,1,float16,fp8,0,0.0227743998169899
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,32,4,128,1,float16,float16,0,0.022648000717163087
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,32,4,128,1,float16,fp8,0,0.021593600511550903
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,32,4,128,1,fp8,fp8,0,0.02271360009908676
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,32,8,128,1,float16,float16,0,0.02295839935541153
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,32,8,128,1,float16,fp8,0,0.022193600237369538
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,32,8,128,1,fp8,fp8,0,0.022651199996471406
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,32,32,128,1,float16,fp8,0,0.018569600582122803
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,32,32,128,1,fp8,fp8,0,0.018680000305175783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,32,32,128,1,float16,float16,0,0.016862399876117706
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,32,1,128,1,float16,float16,0,0.014537599682807923
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,32,1,128,1,float16,fp8,0,0.01653439998626709
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,32,1,128,1,fp8,fp8,0,0.014824000000953675
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,32,2,128,1,float16,float16,0,0.01451520025730133
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,32,2,128,1,float16,fp8,0,0.016521599888801575
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,32,4,128,1,float16,float16,0,0.014582400023937226
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,32,2,128,1,fp8,fp8,0,0.014883199334144592
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,32,4,128,1,float16,fp8,0,0.01459839940071106
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,32,4,128,1,fp8,fp8,0,0.014556799829006196
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,32,8,128,1,float16,fp8,0,0.014662399888038635
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,32,8,128,1,float16,float16,0,0.01647839993238449
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,32,8,128,1,fp8,fp8,0,0.014532800018787383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,32,32,128,1,float16,float16,0,0.01486240029335022
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,32,32,128,1,float16,fp8,0,0.016548800468444824
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,32,32,128,1,fp8,fp8,0,0.015740799903869628
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,32,1,128,1,float16,float16,0,0.014374400675296783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,32,1,128,1,float16,fp8,0,0.014616000652313232
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,32,2,128,1,float16,float16,0,0.014511999487876893
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,32,1,128,1,fp8,fp8,0,0.014532800018787383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,32,2,128,1,fp8,fp8,0,0.014505599439144135
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,32,4,128,1,float16,float16,0,0.014481599628925323
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,32,2,128,1,float16,fp8,0,0.014612799882888794
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,32,4,128,1,fp8,fp8,0,0.014460800588130951
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,32,8,128,1,float16,float16,0,0.014657600224018097
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,32,4,128,1,float16,fp8,0,0.014511999487876893
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,32,8,128,1,fp8,fp8,0,0.014595200121402741
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,32,32,128,1,float16,float16,0,0.014553600549697876
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,32,8,128,1,float16,fp8,0,0.01451359987258911
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,32,32,128,1,float16,fp8,0,0.014635199308395385
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,32,1,128,1,float16,float16,0,0.012511999905109405
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,32,32,128,1,fp8,fp8,0,0.014535999298095703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,32,1,128,1,fp8,fp8,0,0.014371199905872345
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,32,1,128,1,float16,fp8,0,0.014449599385261535
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,32,2,128,1,float16,float16,0,0.014499199390411378
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,32,2,128,1,fp8,fp8,0,0.014511999487876893
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,32,2,128,1,float16,fp8,0,0.014672000706195832
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,32,4,128,1,float16,float16,0,0.014560000598430633
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,32,4,128,1,float16,fp8,0,0.0146479994058609
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,32,4,128,1,fp8,fp8,0,0.01454080045223236
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,32,8,128,1,float16,float16,0,0.014548799395561219
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,32,8,128,1,float16,fp8,0,0.014440000057220459
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,32,8,128,1,fp8,fp8,0,0.014460800588130951
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,32,1,128,1,float16,float16,0,0.23437280654907228
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,32,1,128,1,float16,fp8,0,0.2519248008728027
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,32,1,128,1,fp8,fp8,0,0.25006239414215087
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,32,2,128,1,float16,float16,0,0.23579039573669433
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,32,2,128,1,fp8,fp8,0,0.24978880882263182
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,32,2,128,1,float16,fp8,0,0.2505295991897583
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,32,4,128,1,float16,float16,0,0.24486401081085205
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,32,4,128,1,float16,fp8,0,0.2492271900177002
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,32,4,128,1,fp8,fp8,0,0.2500943899154663
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,32,8,128,1,float16,float16,0,0.2636768102645874
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,32,8,128,1,float16,fp8,0,0.24852960109710692
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,32,32,128,1,float16,float16,0,0.19421759843826295
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,32,8,128,1,fp8,fp8,0,0.24896159172058105
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,32,32,128,1,float16,fp8,0,0.18300800323486327
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,32,1,128,1,float16,fp8,0,0.12948800325393678
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,32,32,128,1,fp8,fp8,0,0.18463679552078247
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,32,1,128,1,float16,float16,0,0.12428640127182007
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,32,1,128,1,fp8,fp8,0,0.12984319925308227
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,32,2,128,1,float16,float16,0,0.12406719923019409
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,32,2,128,1,float16,fp8,0,0.12975679636001586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,32,2,128,1,fp8,fp8,0,0.12980159521102905
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,32,4,128,1,float16,float16,0,0.12862080335617065
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,32,4,128,1,float16,fp8,0,0.12969759702682496
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,32,4,128,1,fp8,fp8,0,0.12935199737548828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,32,8,128,1,float16,float16,0,0.13822720050811768
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,32,8,128,1,float16,fp8,0,0.1292736053466797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,32,8,128,1,fp8,fp8,0,0.12976640462875366
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,32,32,128,1,float16,float16,0,0.10119680166244507
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,32,32,128,1,float16,fp8,0,0.09653760194778442
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,32,1,128,1,float16,float16,0,0.06814720034599304
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,32,32,128,1,fp8,fp8,0,0.09771519899368286
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,32,1,128,1,float16,fp8,0,0.06993280053138733
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,32,2,128,1,float16,float16,0,0.06792640089988708
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,32,1,128,1,fp8,fp8,0,0.07027519941329956
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,32,2,128,1,float16,fp8,0,0.06982240080833435
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,32,2,128,1,fp8,fp8,0,0.06977760195732116
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,32,4,128,1,float16,fp8,0,0.06999999880790711
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,32,4,128,1,float16,float16,0,0.07110400199890136
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,32,4,128,1,fp8,fp8,0,0.06986079812049865
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,32,8,128,1,float16,fp8,0,0.07014399766921997
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,32,8,128,1,float16,float16,0,0.0754527986049652
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,32,8,128,1,fp8,fp8,0,0.07020639777183532
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,32,32,128,1,float16,fp8,0,0.0534496009349823
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,32,32,128,1,float16,float16,0,0.055611199140548705
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,32,32,128,1,fp8,fp8,0,0.053376001119613645
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,32,1,128,1,float16,float16,0,0.037108799815177916
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,32,1,128,1,float16,fp8,0,0.039083200693130496
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,32,1,128,1,fp8,fp8,0,0.03744319975376129
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,32,2,128,1,float16,float16,0,0.037083199620246886
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,32,2,128,1,float16,fp8,0,0.039022400975227356
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,32,2,128,1,fp8,fp8,0,0.03903520107269287
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,32,4,128,1,float16,float16,0,0.03732320070266724
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,32,4,128,1,fp8,fp8,0,0.03914240002632141
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,32,4,128,1,float16,fp8,0,0.03907679915428162
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,32,8,128,1,float16,float16,0,0.04118239879608154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,32,8,128,1,fp8,fp8,0,0.03907999992370605
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,32,8,128,1,float16,fp8,0,0.03905119895935059
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,32,32,128,1,float16,float16,0,0.03091840147972107
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,32,32,128,1,fp8,fp8,0,0.030982398986816408
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,32,1,128,1,float16,float16,0,0.02281759977340698
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,32,32,128,1,float16,fp8,0,0.030958399176597595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,32,1,128,1,float16,fp8,0,0.024796800315380098
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,32,1,128,1,fp8,fp8,0,0.02470880001783371
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,32,2,128,1,float16,float16,0,0.0247296005487442
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,32,2,128,1,float16,fp8,0,0.024777600169181825
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,32,2,128,1,fp8,fp8,0,0.024743999540805816
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,32,4,128,1,float16,float16,0,0.02494560033082962
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,32,4,128,1,float16,fp8,0,0.024775999784469604
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,32,4,128,1,fp8,fp8,0,0.024748800694942473
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,32,8,128,1,float16,fp8,0,0.024798400700092316
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,32,8,128,1,float16,float16,0,0.02476000040769577
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,32,8,128,1,fp8,fp8,0,0.024672000110149382
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,32,32,128,1,float16,float16,0,0.01865279972553253
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,32,32,128,1,float16,fp8,0,0.01860159933567047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,32,32,128,1,fp8,fp8,0,0.019215999543666838
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,32,1,128,1,float16,float16,0,0.016443200409412384
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,32,1,128,1,float16,fp8,0,0.016582399606704712
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,32,2,128,1,float16,float16,0,0.01650400012731552
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,32,1,128,1,fp8,fp8,0,0.016575999557971954
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,32,2,128,1,float16,fp8,0,0.01652960032224655
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,32,2,128,1,fp8,fp8,0,0.016577599942684172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,32,4,128,1,float16,float16,0,0.016499200463294984
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,32,4,128,1,float16,fp8,0,0.0165120005607605
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,32,8,128,1,float16,float16,0,0.01666560024023056
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,32,8,128,1,float16,fp8,0,0.01666080057621002
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,32,4,128,1,fp8,fp8,0,0.01679839938879013
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,32,32,128,1,float16,float16,0,0.014339199662208557
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,32,8,128,1,fp8,fp8,0,0.016550399363040924
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,32,32,128,1,float16,fp8,0,0.014441600441932679
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,32,1,128,1,float16,float16,0,0.012545600533485413
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,32,32,128,1,fp8,fp8,0,0.01454080045223236
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,32,1,128,1,fp8,fp8,0,0.012540799379348756
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,32,1,128,1,float16,fp8,0,0.012529599666595458
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,32,2,128,1,float16,float16,0,0.012428800016641617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,32,2,128,1,float16,fp8,0,0.012515200674533844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,32,2,128,1,fp8,fp8,0,0.012435200065374375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,32,4,128,1,float16,float16,0,0.012455999851226807
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,32,4,128,1,float16,fp8,0,0.012454400211572647
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,32,4,128,1,fp8,fp8,0,0.012451200187206269
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,32,8,128,1,float16,float16,0,0.012468799948692322
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,32,8,128,1,float16,fp8,0,0.012432000041007996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,32,8,128,1,fp8,fp8,0,0.012505599856376648
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,32,32,128,1,float16,float16,0,0.012476799637079239
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,32,32,128,1,float16,fp8,0,0.01239359974861145
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,32,32,128,1,fp8,fp8,0,0.012467200309038163
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,32,1,128,1,float16,float16,0,0.010409600287675857
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,32,1,128,1,float16,fp8,0,0.011406400054693223
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,32,1,128,1,fp8,fp8,0,0.010489600151777268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,32,2,128,1,float16,float16,0,0.010491199791431427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,32,2,128,1,fp8,fp8,0,0.010601600259542465
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,32,2,128,1,float16,fp8,0,0.012267199903726577
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,32,4,128,1,float16,float16,0,0.010438399761915207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,32,4,128,1,float16,fp8,0,0.01048320010304451
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,32,8,128,1,float16,float16,0,0.010496000200510025
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,32,4,128,1,fp8,fp8,0,0.012427199631929398
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,32,8,128,1,float16,fp8,0,0.010518400371074677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,32,8,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,32,32,128,1,float16,float16,0,0.012398400157690049
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,32,32,128,1,float16,fp8,0,0.010462400317192078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,32,1,128,1,float16,float16,0,0.010606399923563003
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,32,32,128,1,fp8,fp8,0,0.010441599786281586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,32,1,128,1,fp8,fp8,0,0.010505600273609162
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,32,1,128,1,float16,fp8,0,0.010436800122261048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,32,2,128,1,float16,float16,0,0.010420800000429154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,32,2,128,1,float16,fp8,0,0.011124800145626067
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,32,2,128,1,fp8,fp8,0,0.01043040007352829
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,32,4,128,1,float16,float16,0,0.010531199723482132
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,32,4,128,1,float16,fp8,0,0.010625600069761276
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,32,4,128,1,fp8,fp8,0,0.01048159971833229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,32,8,128,1,float16,float16,0,0.010412800312042236
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,32,8,128,1,float16,fp8,0,0.010791999846696853
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,32,8,128,1,fp8,fp8,0,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,32,1,128,1,float16,float16,0,0.19586399793624878
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,32,1,128,1,float16,fp8,0,0.20112318992614747
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,32,1,128,1,fp8,fp8,0,0.20077919960021973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,32,2,128,1,float16,float16,0,0.19617120027542115
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,32,2,128,1,float16,fp8,0,0.20071039199829102
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,32,2,128,1,fp8,fp8,0,0.20064160823822022
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,32,4,128,1,float16,float16,0,0.20132639408111572
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,32,4,128,1,float16,fp8,0,0.20059359073638916
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,32,4,128,1,fp8,fp8,0,0.1997712016105652
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,32,8,128,1,float16,float16,0,0.2096640110015869
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,32,8,128,1,float16,fp8,0,0.19898879528045654
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,32,8,128,1,fp8,fp8,0,0.19921599626541137
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,32,32,128,1,float16,float16,0,0.13608959913253785
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,32,32,128,1,float16,fp8,0,0.1314128041267395
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,32,32,128,1,fp8,fp8,0,0.13115520477294923
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,32,1,128,1,float16,float16,0,0.10291039943695068
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,32,1,128,1,float16,fp8,0,0.10480320453643799
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,32,1,128,1,fp8,fp8,0,0.10332640409469604
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,32,2,128,1,float16,float16,0,0.10361759662628174
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,32,2,128,1,float16,fp8,0,0.10478880405426025
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,32,2,128,1,fp8,fp8,0,0.10480799674987792
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,32,4,128,1,float16,float16,0,0.10618400573730469
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,32,4,128,1,float16,fp8,0,0.10480159521102905
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,32,4,128,1,fp8,fp8,0,0.10463999509811402
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,32,8,128,1,float16,float16,0,0.11170400381088257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,32,8,128,1,float16,fp8,0,0.10405440330505371
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,32,8,128,1,fp8,fp8,0,0.10511200428009033
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,32,32,128,1,float16,float16,0,0.07236639857292175
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,32,32,128,1,float16,fp8,0,0.07171040177345275
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,32,32,128,1,fp8,fp8,0,0.07101759910583497
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,32,1,128,1,float16,float16,0,0.05568959712982178
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,32,1,128,1,float16,fp8,0,0.05572800040245056
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,32,1,128,1,fp8,fp8,0,0.05721759796142578
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,32,2,128,1,float16,float16,0,0.055504000186920165
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,32,2,128,1,float16,fp8,0,0.05756319761276245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,32,2,128,1,fp8,fp8,0,0.05582879781723023
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,32,4,128,1,float16,fp8,0,0.0573423981666565
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,32,4,128,1,float16,float16,0,0.057633602619171144
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,32,4,128,1,fp8,fp8,0,0.05659679770469665
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,32,8,128,1,float16,float16,0,0.06158879995346069
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,32,8,128,1,float16,fp8,0,0.05607680082321167
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,32,32,128,1,float16,float16,0,0.039396798610687254
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,32,32,128,1,float16,fp8,0,0.03909280002117157
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,32,8,128,1,fp8,fp8,0,0.057107198238372806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,32,32,128,1,fp8,fp8,0,0.03913280069828033
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,32,1,128,1,float16,float16,0,0.03270240128040314
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,32,1,128,1,float16,fp8,0,0.03097119927406311
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,32,2,128,1,float16,float16,0,0.031891199946403506
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,32,1,128,1,fp8,fp8,0,0.03240959942340851
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,32,2,128,1,float16,fp8,0,0.030928000807762146
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,32,2,128,1,fp8,fp8,0,0.03096800148487091
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,32,4,128,1,float16,float16,0,0.03300639986991882
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,32,4,128,1,float16,fp8,0,0.031404799222946166
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,32,4,128,1,fp8,fp8,0,0.030959999561309813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,32,8,128,1,float16,float16,0,0.03330720067024231
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,32,8,128,1,float16,fp8,0,0.031068798899650574
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,32,8,128,1,fp8,fp8,0,0.031033599376678468
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,32,32,128,1,float16,float16,0,0.02383359968662262
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,32,32,128,1,float16,fp8,0,0.024697600305080412
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,32,32,128,1,fp8,fp8,0,0.024710400402545928
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,32,1,128,1,float16,float16,0,0.020633600652217865
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,32,1,128,1,float16,fp8,0,0.02075359970331192
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,32,1,128,1,fp8,fp8,0,0.020720000565052032
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,32,2,128,1,float16,float16,0,0.020665599405765532
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,32,2,128,1,float16,fp8,0,0.020627200603485107
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,32,2,128,1,fp8,fp8,0,0.020694400370121
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,32,4,128,1,float16,float16,0,0.02072799950838089
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,32,4,128,1,float16,fp8,0,0.020689600706100465
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,32,8,128,1,float16,float16,0,0.022720000147819518
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,32,8,128,1,float16,fp8,0,0.020814399421215057
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,32,4,128,1,fp8,fp8,0,0.02104160040616989
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,32,8,128,1,fp8,fp8,0,0.020664000511169435
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,32,32,128,1,float16,float16,0,0.01653600037097931
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,32,32,128,1,float16,fp8,0,0.016704000532627106
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,32,1,128,1,float16,float16,0,0.014553600549697876
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,32,32,128,1,fp8,fp8,0,0.016596800088882445
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,32,1,128,1,float16,fp8,0,0.014830400049686433
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,32,2,128,1,float16,float16,0,0.014510400593280792
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,32,1,128,1,fp8,fp8,0,0.014584000408649444
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,32,2,128,1,fp8,fp8,0,0.014496000111103058
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,32,2,128,1,float16,fp8,0,0.014822399616241455
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,32,4,128,1,float16,float16,0,0.014555199444293976
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,32,4,128,1,float16,fp8,0,0.014510400593280792
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,32,8,128,1,float16,float16,0,0.014601600170135499
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,32,4,128,1,fp8,fp8,0,0.014715200662612915
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,32,8,128,1,float16,fp8,0,0.014550399780273438
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,32,8,128,1,fp8,fp8,0,0.014561599493026734
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,32,32,128,1,float16,float16,0,0.012603199481964112
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,32,32,128,1,float16,fp8,0,0.012491200119256973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,32,32,128,1,fp8,fp8,0,0.01247360035777092
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,32,1,128,1,float16,float16,0,0.01069760024547577
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,32,1,128,1,float16,fp8,0,0.01043199971318245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,32,1,128,1,fp8,fp8,0,0.010428799688816071
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,32,2,128,1,float16,float16,0,0.010780800133943558
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,32,2,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,32,2,128,1,float16,fp8,0,0.01043040007352829
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,32,4,128,1,float16,float16,0,0.01069599986076355
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,32,4,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,32,4,128,1,fp8,fp8,0,0.0105103999376297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,32,8,128,1,float16,fp8,0,0.010436800122261048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,32,8,128,1,float16,float16,0,0.01266079992055893
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,32,8,128,1,fp8,fp8,0,0.010473600029945374
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,32,32,128,1,float16,float16,0,0.011255999654531479
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,32,32,128,1,fp8,fp8,0,0.01045759990811348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,32,32,128,1,float16,fp8,0,0.011479999870061874
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,32,1,128,1,float16,float16,0,0.010526400059461594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,32,1,128,1,float16,fp8,0,0.010438399761915207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,32,2,128,1,float16,float16,0,0.010521599650382995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,32,1,128,1,fp8,fp8,0,0.010793600231409073
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,32,2,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,32,2,128,1,fp8,fp8,0,0.01045600026845932
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,32,4,128,1,float16,fp8,0,0.010407999902963639
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,32,4,128,1,float16,float16,0,0.010678400099277497
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,32,4,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,32,8,128,1,float16,float16,0,0.010441599786281586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,32,8,128,1,float16,fp8,0,0.010708799958229065
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,32,8,128,1,fp8,fp8,0,0.01050880029797554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,32,32,128,1,float16,float16,0,0.010436800122261048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,32,32,128,1,float16,fp8,0,0.010649599879980088
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,32,32,128,1,fp8,fp8,0,0.010364799946546554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,32,1,128,1,float16,float16,0,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,32,1,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,32,1,128,1,float16,fp8,0,0.010619200021028518
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,32,2,128,1,float16,float16,0,0.01045600026845932
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,32,2,128,1,fp8,fp8,0,0.010574399679899215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,32,4,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,32,2,128,1,float16,fp8,0,0.01043199971318245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,32,4,128,1,float16,fp8,0,0.01061440035700798
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,32,4,128,1,fp8,fp8,0,0.010444799810647965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,32,8,128,1,float16,float16,0,0.0104592002928257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,32,8,128,1,float16,fp8,0,0.010718400031328202
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,32,8,128,1,fp8,fp8,0,0.010428799688816071
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,32,1,128,1,float16,float16,0,0.17322720289230348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,32,1,128,1,float16,fp8,0,0.17254240512847902
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,32,1,128,1,fp8,fp8,0,0.17357759475708007
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,32,2,128,1,float16,float16,0,0.17381279468536376
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,32,2,128,1,float16,fp8,0,0.172598397731781
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,32,2,128,1,fp8,fp8,0,0.17422879934310914
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,32,4,128,1,float16,float16,0,0.17512320280075072
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,32,4,128,1,fp8,fp8,0,0.1739583969116211
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,32,4,128,1,float16,fp8,0,0.17241120338439941
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,32,8,128,1,float16,float16,0,0.18075040578842164
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,32,32,128,1,float16,float16,0,0.10760639905929566
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,32,8,128,1,float16,fp8,0,0.17418880462646485
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,32,8,128,1,fp8,fp8,0,0.17241920232772828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,32,32,128,1,float16,fp8,0,0.10477279424667359
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,32,1,128,1,float16,float16,0,0.09069120287895202
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,32,32,128,1,fp8,fp8,0,0.1047584056854248
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,32,1,128,1,float16,fp8,0,0.09033439755439758
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,32,2,128,1,float16,float16,0,0.09228799939155578
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,32,1,128,1,fp8,fp8,0,0.0904304027557373
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,32,2,128,1,float16,fp8,0,0.09080960154533387
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,32,2,128,1,fp8,fp8,0,0.09081439971923828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,32,4,128,1,float16,float16,0,0.09267839789390564
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,32,4,128,1,float16,fp8,0,0.09031999707221985
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,32,4,128,1,fp8,fp8,0,0.09035519957542419
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,32,8,128,1,float16,float16,0,0.09660159945487976
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,32,8,128,1,float16,fp8,0,0.09043520092964172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,32,8,128,1,fp8,fp8,0,0.09048960208892823
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,32,32,128,1,float16,float16,0,0.05790240168571472
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,32,32,128,1,float16,fp8,0,0.055718398094177245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,32,32,128,1,fp8,fp8,0,0.05581439733505249
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,32,1,128,1,float16,float16,0,0.05028480291366577
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,32,1,128,1,float16,fp8,0,0.049553599953651425
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,32,1,128,1,fp8,fp8,0,0.04943839907646179
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,32,2,128,1,float16,fp8,0,0.04946720004081726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,32,2,128,1,float16,float16,0,0.05166879892349243
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,32,2,128,1,fp8,fp8,0,0.04931359887123108
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,32,4,128,1,float16,float16,0,0.051588797569274904
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,32,4,128,1,fp8,fp8,0,0.04943200051784515
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,32,4,128,1,float16,fp8,0,0.04938719868659973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,32,8,128,1,float16,float16,0,0.051481598615646364
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,32,8,128,1,float16,fp8,0,0.04933600127696991
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,32,8,128,1,fp8,fp8,0,0.04964320063591003
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,32,32,128,1,float16,float16,0,0.03298400044441223
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,32,32,128,1,float16,fp8,0,0.031201601028442383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,32,1,128,1,float16,float16,0,0.029702401161193846
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,32,32,128,1,fp8,fp8,0,0.03310559988021851
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,32,1,128,1,float16,fp8,0,0.029017600417137145
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,32,1,128,1,fp8,fp8,0,0.028918400406837463
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,32,2,128,1,float16,float16,0,0.03073279857635498
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,32,2,128,1,fp8,fp8,0,0.028984001278877257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,32,2,128,1,float16,fp8,0,0.028891199827194215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,32,4,128,1,float16,float16,0,0.03100320100784302
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,32,4,128,1,float16,fp8,0,0.028993600606918336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,32,4,128,1,fp8,fp8,0,0.028883200883865357
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,32,8,128,1,float16,float16,0,0.03115519881248474
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,32,8,128,1,float16,fp8,0,0.0289792001247406
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,32,8,128,1,fp8,fp8,0,0.02895680069923401
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,32,32,128,1,float16,float16,0,0.0208639994263649
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,32,32,128,1,float16,fp8,0,0.020873600244522096
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,32,32,128,1,fp8,fp8,0,0.020715199410915375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,32,1,128,1,float16,float16,0,0.02059520035982132
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,32,1,128,1,float16,fp8,0,0.019681599736213685
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,32,1,128,1,fp8,fp8,0,0.01863519996404648
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,32,2,128,1,float16,float16,0,0.0204815998673439
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,32,2,128,1,fp8,fp8,0,0.018628799915313722
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,32,2,128,1,float16,fp8,0,0.018787199258804323
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,32,4,128,1,float16,float16,0,0.020531199872493744
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,32,4,128,1,float16,fp8,0,0.018699200451374055
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,32,4,128,1,fp8,fp8,0,0.018680000305175783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,32,8,128,1,float16,fp8,0,0.018695999681949616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,32,8,128,1,float16,float16,0,0.02059040069580078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,32,8,128,1,fp8,fp8,0,0.01886080056428909
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,32,32,128,1,float16,float16,0,0.014539200067520141
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,32,32,128,1,float16,fp8,0,0.014668799936771393
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,32,32,128,1,fp8,fp8,0,0.014608000218868256
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,32,1,128,1,float16,float16,0,0.014472000300884247
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,32,1,128,1,float16,fp8,0,0.014521600306034088
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,32,1,128,1,fp8,fp8,0,0.014379200339317322
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,32,2,128,1,float16,float16,0,0.014537599682807923
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,32,2,128,1,float16,fp8,0,0.014414399862289429
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,32,2,128,1,fp8,fp8,0,0.014443199336528777
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,32,4,128,1,float16,float16,0,0.014526399970054626
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,32,4,128,1,float16,fp8,0,0.014451199769973755
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,32,4,128,1,fp8,fp8,0,0.014457599818706512
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,32,8,128,1,float16,float16,0,0.014500799775123595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,32,8,128,1,float16,fp8,0,0.014451199769973755
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,32,8,128,1,fp8,fp8,0,0.014412799477577209
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,32,32,128,1,float16,float16,0,0.012430399656295776
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,32,32,128,1,float16,fp8,0,0.012508800625801087
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,32,32,128,1,fp8,fp8,0,0.012403199821710587
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,32,1,128,1,float16,float16,0,0.010436800122261048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,32,1,128,1,float16,fp8,0,0.010513599961996078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,32,1,128,1,fp8,fp8,0,0.01045759990811348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,32,2,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,32,2,128,1,float16,fp8,0,0.010518400371074677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,32,2,128,1,fp8,fp8,0,0.010628800094127654
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,32,4,128,1,float16,float16,0,0.010606399923563003
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,32,4,128,1,float16,fp8,0,0.010468800365924836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,32,4,128,1,fp8,fp8,0,0.010487999767065048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,32,8,128,1,float16,float16,0,0.01043040007352829
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,32,8,128,1,fp8,fp8,0,0.010480000078678131
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,32,8,128,1,float16,fp8,0,0.010662399977445603
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,32,32,128,1,float16,float16,0,0.010441599786281586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,32,32,128,1,float16,fp8,0,0.010539200156927109
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,32,1,128,1,float16,float16,0,0.010377600044012069
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,32,32,128,1,fp8,fp8,0,0.010660800337791442
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,32,1,128,1,float16,fp8,0,0.010515200346708298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,32,1,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,32,2,128,1,float16,float16,0,0.010632000118494033
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,32,2,128,1,float16,fp8,0,0.010507199913263321
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,32,2,128,1,fp8,fp8,0,0.010449600219726563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,32,4,128,1,float16,float16,0,0.0106175996363163
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,32,4,128,1,float16,fp8,0,0.010489600151777268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,32,4,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,32,8,128,1,float16,float16,0,0.0104592002928257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,32,8,128,1,fp8,fp8,0,0.01037919968366623
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,32,8,128,1,float16,fp8,0,0.010492800176143647
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,32,32,128,1,float16,fp8,0,0.010419200360774993
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,32,32,128,1,float16,float16,0,0.010583999752998351
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,32,32,128,1,fp8,fp8,0,0.010363200306892395
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,32,1,128,1,float16,float16,0,0.010420800000429154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,32,1,128,1,fp8,fp8,0,0.0104032002389431
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,32,2,128,1,float16,float16,0,0.010395199805498124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,32,1,128,1,float16,fp8,0,0.010465600341558457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,32,2,128,1,float16,fp8,0,0.01048159971833229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,32,2,128,1,fp8,fp8,0,0.01045600026845932
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,32,4,128,1,float16,float16,0,0.010524799674749374
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,32,4,128,1,float16,fp8,0,0.010444799810647965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,32,4,128,1,fp8,fp8,0,0.010567999631166457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,32,8,128,1,float16,float16,0,0.01061440035700798
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,32,8,128,1,float16,fp8,0,0.010529600083827972
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,32,8,128,1,fp8,fp8,0,0.010566399991512298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,32,1,128,1,float16,float16,0,0.1668336033821106
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,32,1,128,1,float16,fp8,0,0.15995839834213257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,32,1,128,1,fp8,fp8,0,0.16019840240478517
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,32,2,128,1,float16,float16,0,0.16662559509277344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,32,2,128,1,float16,fp8,0,0.16034560203552245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,32,2,128,1,fp8,fp8,0,0.15990240573883058
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,32,4,128,1,float16,float16,0,0.1692960023880005
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,32,4,128,1,float16,fp8,0,0.1599984049797058
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,32,4,128,1,fp8,fp8,0,0.16013920307159424
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,32,8,128,1,float16,float16,0,0.1708624005317688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,32,8,128,1,float16,fp8,0,0.16015679836273194
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,32,8,128,1,fp8,fp8,0,0.1601088047027588
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,32,32,128,1,float16,float16,0,0.09892320036888122
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,32,32,128,1,float16,fp8,0,0.09079359769821167
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,32,32,128,1,fp8,fp8,0,0.0904591977596283
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,32,1,128,1,float16,float16,0,0.08868640065193176
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,32,1,128,1,float16,fp8,0,0.08425599932670594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,32,1,128,1,fp8,fp8,0,0.08438559770584106
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,32,2,128,1,float16,fp8,0,0.08432480096817016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,32,2,128,1,float16,float16,0,0.088646399974823
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,32,2,128,1,fp8,fp8,0,0.08416960239410401
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,32,4,128,1,float16,float16,0,0.0902559995651245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,32,4,128,1,float16,fp8,0,0.08434879779815674
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,32,4,128,1,fp8,fp8,0,0.08422080278396607
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,32,8,128,1,float16,float16,0,0.090692800283432
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,32,8,128,1,fp8,fp8,0,0.08432959914207458
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,32,8,128,1,float16,fp8,0,0.08443999886512757
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,32,32,128,1,float16,float16,0,0.05183359980583191
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,32,32,128,1,float16,fp8,0,0.05039680004119873
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,32,32,128,1,fp8,fp8,0,0.04986560046672821
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,32,1,128,1,float16,float16,0,0.049542400240898135
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,32,1,128,1,float16,fp8,0,0.04719040095806122
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,32,1,128,1,fp8,fp8,0,0.047356799244880676
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,32,2,128,1,float16,float16,0,0.049465599656105044
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,32,2,128,1,float16,fp8,0,0.04731839895248413
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,32,4,128,1,float16,float16,0,0.05000479817390442
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,32,2,128,1,fp8,fp8,0,0.047502401471138
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,32,4,128,1,float16,fp8,0,0.04725759923458099
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,32,4,128,1,fp8,fp8,0,0.04734080135822296
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,32,8,128,1,float16,float16,0,0.049619200825691226
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,32,8,128,1,float16,fp8,0,0.04727199971675873
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,32,8,128,1,fp8,fp8,0,0.04732959866523743
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,32,32,128,1,float16,fp8,0,0.02897599935531616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,32,32,128,1,float16,float16,0,0.031179198622703554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,32,32,128,1,fp8,fp8,0,0.028984001278877257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,32,1,128,1,float16,float16,0,0.028984001278877257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,32,1,128,1,float16,fp8,0,0.027289599180221558
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,32,1,128,1,fp8,fp8,0,0.026955199241638184
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,32,2,128,1,float16,float16,0,0.02903519868850708
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,32,2,128,1,fp8,fp8,0,0.02696000039577484
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,32,4,128,1,float16,float16,0,0.028990399837493897
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,32,2,128,1,float16,fp8,0,0.026995199918746948
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,32,4,128,1,float16,fp8,0,0.02699039876461029
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,32,4,128,1,fp8,fp8,0,0.02693760097026825
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,32,8,128,1,float16,float16,0,0.02902719974517822
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,32,8,128,1,float16,fp8,0,0.027193599939346315
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,32,32,128,1,float16,float16,0,0.020604799687862396
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,32,8,128,1,fp8,fp8,0,0.026931199431419372
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,32,32,128,1,float16,fp8,0,0.01976960003376007
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,32,32,128,1,fp8,fp8,0,0.018612800538539885
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,32,1,128,1,float16,float16,0,0.01879200041294098
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,32,1,128,1,float16,fp8,0,0.01868640035390854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,32,1,128,1,fp8,fp8,0,0.01865919977426529
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,32,2,128,1,float16,float16,0,0.018639999628067016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,32,2,128,1,float16,fp8,0,0.01886080056428909
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,32,2,128,1,fp8,fp8,0,0.018611200153827667
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,32,4,128,1,float16,float16,0,0.01966080069541931
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,32,4,128,1,float16,fp8,0,0.018779200315475465
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,32,4,128,1,fp8,fp8,0,0.01863040030002594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,32,8,128,1,float16,float16,0,0.020619200170040132
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,32,8,128,1,float16,fp8,0,0.01873439997434616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,32,8,128,1,fp8,fp8,0,0.018593600392341612
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,32,32,128,1,float16,float16,0,0.014635199308395385
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,32,32,128,1,float16,fp8,0,0.014468799531459808
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,32,32,128,1,fp8,fp8,0,0.014555199444293976
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,32,1,128,1,float16,float16,0,0.014478400349617004
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,32,1,128,1,float16,fp8,0,0.01435520052909851
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,32,1,128,1,fp8,fp8,0,0.013705599308013915
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,32,2,128,1,float16,float16,0,0.014433600008487701
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,32,4,128,1,float16,float16,0,0.014451199769973755
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,32,2,128,1,fp8,fp8,0,0.014326399564743042
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,32,2,128,1,float16,fp8,0,0.014480000734329224
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,32,4,128,1,float16,fp8,0,0.01281919926404953
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,32,4,128,1,fp8,fp8,0,0.0144896000623703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,32,8,128,1,float16,float16,0,0.014428800344467163
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,32,8,128,1,float16,fp8,0,0.013247999548912048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,32,8,128,1,fp8,fp8,0,0.012590399384498597
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,32,32,128,1,float16,float16,0,0.012465599924325943
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,32,32,128,1,float16,fp8,0,0.010492800176143647
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,32,1,128,1,float16,float16,0,0.010521599650382995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,32,32,128,1,fp8,fp8,0,0.010443200170993806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,32,1,128,1,float16,fp8,0,0.010540799796581268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,32,1,128,1,fp8,fp8,0,0.010476800054311753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,32,2,128,1,float16,float16,0,0.010428799688816071
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,32,2,128,1,float16,fp8,0,0.010480000078678131
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,32,2,128,1,fp8,fp8,0,0.010558400303125381
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,32,4,128,1,float16,float16,0,0.010424000024795533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,32,4,128,1,float16,fp8,0,0.010515200346708298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,32,4,128,1,fp8,fp8,0,0.01043199971318245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,32,8,128,1,float16,float16,0,0.010491199791431427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,32,8,128,1,fp8,fp8,0,0.010548800230026245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,32,8,128,1,float16,fp8,0,0.010592000186443329
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,32,32,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,32,32,128,1,float16,fp8,0,0.01048159971833229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,32,32,128,1,fp8,fp8,0,0.01050880029797554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,32,1,128,1,float16,float16,0,0.010440000146627427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,32,1,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,32,1,128,1,fp8,fp8,0,0.010571199655532836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,32,2,128,1,float16,float16,0,0.010428799688816071
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,32,2,128,1,float16,fp8,0,0.010513599961996078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,32,2,128,1,fp8,fp8,0,0.010552000254392624
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,32,4,128,1,float16,float16,0,0.010361599922180175
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,32,4,128,1,float16,fp8,0,0.010320000350475311
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,32,4,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,32,8,128,1,float16,float16,0,0.010523200035095215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,32,8,128,1,float16,fp8,0,0.010427200049161912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,32,8,128,1,fp8,fp8,0,0.010496000200510025
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,32,32,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,32,32,128,1,float16,fp8,0,0.010344000160694122
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,32,32,128,1,fp8,fp8,0,0.010468800365924836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,32,1,128,1,float16,float16,0,0.010339199751615524
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,32,1,128,1,float16,fp8,0,0.010369600355625152
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,32,1,128,1,fp8,fp8,0,0.010444799810647965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,32,2,128,1,float16,float16,0,0.010388799756765366
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,32,2,128,1,float16,fp8,0,0.010414399951696397
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,32,2,128,1,fp8,fp8,0,0.010487999767065048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,32,4,128,1,float16,float16,0,0.010391999781131745
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,32,4,128,1,float16,fp8,0,0.010401599854230881
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,32,4,128,1,fp8,fp8,0,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,32,8,128,1,float16,float16,0,0.010398399829864503
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,32,8,128,1,float16,fp8,0,0.010360000282526016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,32,8,128,1,fp8,fp8,0,0.010542400181293488
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,32,1,128,1,float16,float16,0,0.16467679738998414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,32,1,128,1,float16,fp8,0,0.1540079951286316
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,32,1,128,1,fp8,fp8,0,0.15382080078125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,32,2,128,1,float16,float16,0,0.16477279663085936
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,32,2,128,1,float16,fp8,0,0.15385279655456544
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,32,2,128,1,fp8,fp8,0,0.1533184051513672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,32,4,128,1,float16,float16,0,0.16481599807739258
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,32,4,128,1,float16,fp8,0,0.15387519598007202
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,32,4,128,1,fp8,fp8,0,0.15331360101699829
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,32,8,128,1,float16,float16,0,0.16442079544067384
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,32,8,128,1,float16,fp8,0,0.15412960052490235
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,32,8,128,1,fp8,fp8,0,0.15363359451293945
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,32,32,128,1,float16,float16,0,0.08835679888725281
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,32,32,128,1,float16,fp8,0,0.08230400085449219
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,32,32,128,1,fp8,fp8,0,0.08213760256767273
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,32,1,128,1,float16,float16,0,0.08806239962577819
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,32,1,128,1,fp8,fp8,0,0.08209919929504395
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,32,1,128,1,float16,fp8,0,0.08229920268058777
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,32,2,128,1,float16,float16,0,0.08785600066184998
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,32,2,128,1,float16,fp8,0,0.08205599784851074
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,32,2,128,1,fp8,fp8,0,0.08223519921302795
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,32,4,128,1,float16,float16,0,0.0883679986000061
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,32,4,128,1,float16,fp8,0,0.08215199708938599
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,32,4,128,1,fp8,fp8,0,0.08229759931564332
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,32,8,128,1,float16,float16,0,0.08833760023117065
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,32,8,128,1,float16,fp8,0,0.08219199776649475
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,32,8,128,1,fp8,fp8,0,0.08218880295753479
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,32,32,128,1,float16,float16,0,0.0494623988866806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,32,32,128,1,float16,fp8,0,0.0453247994184494
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,32,32,128,1,fp8,fp8,0,0.04628320038318634
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,32,1,128,1,float16,float16,0,0.04945439994335175
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,32,1,128,1,float16,fp8,0,0.04531359970569611
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,32,1,128,1,fp8,fp8,0,0.04568960070610047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,32,2,128,1,float16,float16,0,0.04935039877891541
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,32,2,128,1,float16,fp8,0,0.04532159864902496
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,32,2,128,1,fp8,fp8,0,0.04606879949569702
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,32,4,128,1,float16,float16,0,0.04943679869174957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,32,4,128,1,float16,fp8,0,0.045326399803161624
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,32,4,128,1,fp8,fp8,0,0.04551360011100769
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,32,8,128,1,float16,float16,0,0.049291199445724486
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,32,8,128,1,float16,fp8,0,0.045326399803161624
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,32,32,128,1,float16,float16,0,0.02887679934501648
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,32,32,128,1,float16,fp8,0,0.026867198944091796
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,32,8,128,1,fp8,fp8,0,0.0453792005777359
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,32,32,128,1,fp8,fp8,0,0.026817598938941957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,32,1,128,1,float16,float16,0,0.028977599740028382
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,32,1,128,1,float16,fp8,0,0.02690559923648834
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,32,2,128,1,float16,float16,0,0.02889760136604309
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,32,1,128,1,fp8,fp8,0,0.026848000288009644
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,32,2,128,1,float16,fp8,0,0.026927998661994933
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,32,2,128,1,fp8,fp8,0,0.026868799328804018
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,32,4,128,1,float16,float16,0,0.028814399242401124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,32,4,128,1,float16,fp8,0,0.026902401447296144
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,32,4,128,1,fp8,fp8,0,0.02693600058555603
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,32,8,128,1,float16,float16,0,0.02884640097618103
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,32,8,128,1,float16,fp8,0,0.027003198862075806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,32,8,128,1,fp8,fp8,0,0.027000001072883605
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,32,32,128,1,float16,float16,0,0.01984799951314926
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,32,32,128,1,float16,fp8,0,0.018702399730682374
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,32,1,128,1,float16,float16,0,0.01870719939470291
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,32,32,128,1,fp8,fp8,0,0.018747200071811677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,32,1,128,1,float16,fp8,0,0.01871200054883957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,32,1,128,1,fp8,fp8,0,0.018587200343608855
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,32,2,128,1,float16,float16,0,0.018760000169277192
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,32,2,128,1,float16,fp8,0,0.018566399812698364
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,32,2,128,1,fp8,fp8,0,0.01878879964351654
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,32,4,128,1,float16,float16,0,0.01865919977426529
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,32,4,128,1,float16,fp8,0,0.01876319944858551
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,32,4,128,1,fp8,fp8,0,0.018702399730682374
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,32,8,128,1,float16,float16,0,0.01876319944858551
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,32,8,128,1,float16,fp8,0,0.018615999817848207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,32,8,128,1,fp8,fp8,0,0.01854719966650009
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,32,32,128,1,float16,float16,0,0.015324799716472626
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,32,32,128,1,float16,fp8,0,0.014459200203418732
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,32,32,128,1,fp8,fp8,0,0.01252640038728714
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,32,1,128,1,float16,float16,0,0.01446560025215149
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,32,1,128,1,float16,fp8,0,0.01430400013923645
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,32,1,128,1,fp8,fp8,0,0.012515200674533844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,32,2,128,1,float16,float16,0,0.014500799775123595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,32,2,128,1,float16,fp8,0,0.014451199769973755
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,32,2,128,1,fp8,fp8,0,0.012479999661445617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,32,4,128,1,float16,float16,0,0.014468799531459808
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,32,4,128,1,float16,fp8,0,0.01449279934167862
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,32,4,128,1,fp8,fp8,0,0.012608000636100769
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,32,8,128,1,float16,float16,0,0.014507199823856353
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,32,8,128,1,float16,fp8,0,0.013652800023555756
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,32,8,128,1,fp8,fp8,0,0.012707200646400452
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,32,32,128,1,float16,float16,0,0.012455999851226807
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,32,32,128,1,float16,fp8,0,0.0105103999376297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,32,32,128,1,fp8,fp8,0,0.010460799932479859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,32,1,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,32,1,128,1,float16,fp8,0,0.010497599840164185
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,32,1,128,1,fp8,fp8,0,0.010460799932479859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,32,2,128,1,float16,fp8,0,0.010526400059461594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,32,2,128,1,float16,float16,0,0.01043040007352829
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,32,2,128,1,fp8,fp8,0,0.010441599786281586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,32,4,128,1,float16,fp8,0,0.010489600151777268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,32,4,128,1,fp8,fp8,0,0.010428799688816071
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,32,4,128,1,float16,float16,0,0.011948800086975098
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,32,8,128,1,float16,float16,0,0.010416000336408614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,32,8,128,1,float16,fp8,0,0.010500799864530563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,32,8,128,1,fp8,fp8,0,0.010515200346708298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,32,32,128,1,float16,float16,0,0.010542400181293488
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,32,32,128,1,float16,fp8,0,0.010427200049161912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,32,32,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,32,1,128,1,float16,float16,0,0.010398399829864503
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,32,1,128,1,fp8,fp8,0,0.010555200278759003
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,32,1,128,1,float16,fp8,0,0.010446400195360184
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,32,2,128,1,float16,float16,0,0.0104032002389431
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,32,2,128,1,float16,fp8,0,0.01053759977221489
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,32,2,128,1,fp8,fp8,0,0.010491199791431427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,32,4,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,32,4,128,1,float16,fp8,0,0.010489600151777268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,32,4,128,1,fp8,fp8,0,0.010390400141477584
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,32,8,128,1,float16,float16,0,0.010438399761915207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,32,8,128,1,float16,fp8,0,0.010577599704265594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,32,8,128,1,fp8,fp8,0,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,32,32,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,32,32,128,1,float16,fp8,0,0.010375999659299851
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,32,32,128,1,fp8,fp8,0,0.010473600029945374
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,32,1,128,1,float16,float16,0,0.010420800000429154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,32,1,128,1,float16,fp8,0,0.01045759990811348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,32,1,128,1,fp8,fp8,0,0.010462400317192078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,32,2,128,1,float16,float16,0,0.01037919968366623
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,32,2,128,1,float16,fp8,0,0.010339199751615524
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,32,2,128,1,fp8,fp8,0,0.01040479987859726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,32,4,128,1,float16,float16,0,0.01043040007352829
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,32,4,128,1,float16,fp8,0,0.010367999970912933
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,32,4,128,1,fp8,fp8,0,0.010334400087594986
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,32,8,128,1,float16,float16,0,0.010436800122261048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,32,8,128,1,float16,fp8,0,0.010500799864530563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,32,8,128,1,fp8,fp8,0,0.010463999956846238
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,24,1,128,1,fp8,fp8,0,8.031790161132813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,24,1,128,1,float16,fp8,0,8.492765045166015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,24,1,128,1,float16,float16,0,12.990029907226562
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,24,2,128,1,float16,float16,0,13.484033203125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,24,2,128,1,float16,fp8,0,8.206217956542968
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,24,2,128,1,fp8,fp8,0,8.720161437988281
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,24,4,128,1,float16,fp8,0,8.37551040649414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,24,4,128,1,fp8,fp8,0,8.660380554199218
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,24,4,128,1,float16,float16,0,14.122465515136719
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,24,8,128,1,float16,float16,0,15.689503479003907
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,24,8,128,1,float16,fp8,0,9.241356658935548
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,24,8,128,1,fp8,fp8,0,9.064552307128906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,24,24,128,1,float16,fp8,0,4.252094268798828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,24,1,128,1,float16,float16,0,6.79999008178711
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,24,24,128,1,fp8,fp8,0,4.372633743286133
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,24,1,128,1,float16,fp8,0,4.248070526123047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,24,1,128,1,fp8,fp8,0,4.1583599090576175
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,24,2,128,1,float16,float16,0,6.767246246337891
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,24,2,128,1,float16,fp8,0,4.333996963500977
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,24,2,128,1,fp8,fp8,0,4.352620697021484
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,24,4,128,1,float16,fp8,0,4.446451187133789
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,24,4,128,1,fp8,fp8,0,4.079110336303711
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,24,4,128,1,float16,float16,0,6.710540771484375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,24,8,128,1,float16,fp8,0,4.348417663574219
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,24,8,128,1,fp8,fp8,0,4.292502212524414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,24,8,128,1,float16,float16,0,7.241204833984375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,24,24,128,1,float16,fp8,0,2.323611259460449
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,24,24,128,1,fp8,fp8,0,2.8363391876220705
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,24,1,128,1,float16,float16,0,2.8715631484985353
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,24,1,128,1,float16,fp8,0,2.3731088638305664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,24,1,128,1,fp8,fp8,0,2.4579872131347655
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,24,2,128,1,float16,float16,0,2.5200735092163087
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,24,2,128,1,float16,fp8,0,2.2787616729736326
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,24,2,128,1,fp8,fp8,0,2.0424671173095703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,24,4,128,1,float16,float16,0,2.9932416915893554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,24,4,128,1,float16,fp8,0,2.0437088012695312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,24,4,128,1,fp8,fp8,0,2.531942367553711
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,24,8,128,1,float16,float16,0,2.8254415512084963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,24,8,128,1,float16,fp8,0,2.1247392654418946
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,24,8,128,1,fp8,fp8,0,2.518956756591797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,24,24,128,1,float16,fp8,0,1.1402352333068848
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,24,24,128,1,fp8,fp8,0,1.1300399780273438
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,24,1,128,1,float16,float16,0,1.2281023979187011
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,24,1,128,1,float16,fp8,0,1.5115232467651367
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,24,1,128,1,fp8,fp8,0,1.1019951820373535
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,24,2,128,1,float16,float16,0,1.228433609008789
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,24,2,128,1,float16,fp8,0,1.1309679985046386
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,24,2,128,1,fp8,fp8,0,1.1184703826904296
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,24,4,128,1,float16,float16,0,1.2957183837890625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,24,4,128,1,float16,fp8,0,1.4435680389404297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,24,4,128,1,fp8,fp8,0,1.0894767761230468
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,24,8,128,1,float16,float16,0,1.2493167877197267
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,24,8,128,1,float16,fp8,0,1.1319999694824219
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,24,8,128,1,fp8,fp8,0,1.1119359970092773
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,24,1,128,1,float16,float16,0,7.99815673828125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,24,1,128,1,float16,fp8,0,4.956505584716797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,24,1,128,1,fp8,fp8,0,4.977897644042969
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,24,2,128,1,float16,float16,0,8.719863891601562
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,24,2,128,1,float16,fp8,0,4.85306396484375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,24,2,128,1,fp8,fp8,0,5.323758316040039
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,24,4,128,1,float16,float16,0,8.742958068847656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,24,4,128,1,float16,fp8,0,5.0595855712890625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,24,4,128,1,fp8,fp8,0,5.253945541381836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,24,8,128,1,float16,float16,0,8.730592346191406
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,24,8,128,1,float16,fp8,0,5.187516784667968
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,24,8,128,1,fp8,fp8,0,5.218137741088867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,24,24,128,1,float16,float16,0,2.719612884521484
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,24,24,128,1,float16,float16,0,1.284556770324707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,24,24,128,1,float16,float16,0,6.252552032470703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,24,24,128,1,float16,fp8,0,2.9793584823608397
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,24,24,128,1,fp8,fp8,0,2.4380704879760744
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,24,1,128,1,float16,float16,0,3.28712158203125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,24,1,128,1,float16,fp8,0,2.3919584274291994
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,24,1,128,1,fp8,fp8,0,2.361577606201172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,24,24,128,1,float16,float16,0,2.5818559646606447
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,24,2,128,1,float16,fp8,0,2.478646469116211
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,24,2,128,1,fp8,fp8,0,2.3606800079345702
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,24,2,128,1,float16,float16,0,3.698932647705078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,24,4,128,1,float16,float16,0,3.606884765625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,24,4,128,1,float16,fp8,0,2.356395149230957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,24,4,128,1,fp8,fp8,0,2.3640768051147463
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,24,8,128,1,float16,float16,0,3.6886558532714844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,24,24,128,1,float16,float16,0,1.6354095458984375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,24,8,128,1,float16,fp8,0,2.7228960037231444
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,24,8,128,1,fp8,fp8,0,2.414419174194336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,24,24,128,1,float16,fp8,0,1.314252758026123
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,24,24,128,1,fp8,fp8,0,1.4599247932434083
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,24,1,128,1,float16,float16,0,1.3433504104614258
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,24,1,128,1,float16,fp8,0,1.3482064247131347
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,24,1,128,1,fp8,fp8,0,1.2338383674621582
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,24,2,128,1,float16,float16,0,1.3894639968872071
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,24,2,128,1,float16,fp8,0,1.3636351585388184
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,24,2,128,1,fp8,fp8,0,1.2194432258605956
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,24,4,128,1,float16,float16,0,1.3698687553405762
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,24,4,128,1,float16,fp8,0,1.440116786956787
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,24,4,128,1,fp8,fp8,0,1.2287343978881835
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,24,8,128,1,float16,float16,0,1.3976448059082032
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,24,24,128,1,float16,float16,0,0.7776912212371826
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,24,8,128,1,float16,fp8,0,1.4575535774230957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,24,24,128,1,float16,fp8,0,0.7491615772247314
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,24,8,128,1,fp8,fp8,0,1.2321968078613281
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,24,24,128,1,fp8,fp8,0,0.7097104072570801
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,24,1,128,1,float16,float16,0,0.7570543766021729
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,24,1,128,1,float16,fp8,0,0.7750768184661865
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,24,1,128,1,fp8,fp8,0,0.6732592105865478
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,24,2,128,1,float16,float16,0,0.7432975769042969
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,24,2,128,1,float16,fp8,0,0.7609407901763916
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,24,2,128,1,fp8,fp8,0,0.6837567806243896
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,24,4,128,1,float16,float16,0,0.7609024047851562
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,24,4,128,1,float16,fp8,0,0.704636812210083
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,24,4,128,1,fp8,fp8,0,0.6894015789031982
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,24,8,128,1,float16,float16,0,0.7614736080169677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,24,8,128,1,float16,fp8,0,0.67391037940979
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,24,8,128,1,fp8,fp8,0,0.6666863918304443
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,24,1,128,1,float16,fp8,0,3.468648147583008
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,24,1,128,1,fp8,fp8,0,3.495870590209961
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,24,1,128,1,float16,float16,0,5.044924926757813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,24,2,128,1,float16,float16,0,4.693510437011719
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,24,2,128,1,fp8,fp8,0,3.5141983032226562
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,24,2,128,1,float16,fp8,0,3.747003173828125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,24,4,128,1,float16,fp8,0,3.446752166748047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,24,4,128,1,float16,float16,0,5.666201782226563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,24,4,128,1,fp8,fp8,0,3.545945739746094
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,24,8,128,1,float16,fp8,0,3.43458251953125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,24,24,128,1,float16,float16,0,2.2536191940307617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,24,8,128,1,float16,float16,0,6.329785537719727
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,24,8,128,1,fp8,fp8,0,3.4210590362548827
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,24,24,128,1,float16,fp8,0,1.861097526550293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,24,24,128,1,fp8,fp8,0,2.0159215927124023
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,24,1,128,1,float16,fp8,0,1.7143903732299806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,24,1,128,1,fp8,fp8,0,1.7112512588500977
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,24,1,128,1,float16,float16,0,2.978596878051758
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,24,2,128,1,float16,float16,0,2.07666072845459
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,24,2,128,1,float16,fp8,0,1.7112895965576171
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,24,2,128,1,fp8,fp8,0,2.1514543533325194
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,24,4,128,1,float16,fp8,0,1.7184703826904297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,24,4,128,1,float16,float16,0,2.7053728103637695
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,24,4,128,1,fp8,fp8,0,1.8565696716308593
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,24,8,128,1,float16,float16,0,1.8801935195922852
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,24,8,128,1,fp8,fp8,0,1.8141439437866211
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,24,8,128,1,float16,fp8,0,1.9746623992919923
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,24,24,128,1,float16,float16,0,1.2906736373901366
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,24,24,128,1,float16,fp8,0,0.9294528007507324
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,24,24,128,1,fp8,fp8,0,0.9766816139221192
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,24,1,128,1,float16,float16,0,1.0166336059570313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,24,1,128,1,float16,fp8,0,1.1206368446350097
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,24,1,128,1,fp8,fp8,0,0.888212776184082
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,24,2,128,1,float16,float16,0,0.993496036529541
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,24,2,128,1,float16,fp8,0,1.1075471878051757
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,24,2,128,1,fp8,fp8,0,0.8934335708618164
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,24,4,128,1,float16,float16,0,1.0172224044799805
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,24,4,128,1,fp8,fp8,0,0.8962911605834961
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,24,4,128,1,float16,fp8,0,1.3664015769958495
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,24,8,128,1,float16,float16,0,1.0376416206359864
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,24,8,128,1,float16,fp8,0,1.014084815979004
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,24,24,128,1,float16,float16,0,0.6122560024261474
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,24,8,128,1,fp8,fp8,0,0.8977696418762207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,24,24,128,1,float16,fp8,0,0.5971936225891114
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,24,24,128,1,fp8,fp8,0,0.5311007976531983
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,24,1,128,1,float16,float16,0,0.5565408229827881
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,24,1,128,1,float16,fp8,0,0.4968815803527832
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,24,1,128,1,fp8,fp8,0,0.5733295917510987
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,24,2,128,1,float16,float16,0,0.5499104022979736
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,24,2,128,1,float16,fp8,0,0.5016687870025635
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,24,2,128,1,fp8,fp8,0,0.4960336208343506
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,24,4,128,1,float16,float16,0,0.5509935855865479
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,24,4,128,1,fp8,fp8,0,0.49228482246398925
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,24,4,128,1,float16,fp8,0,0.49282078742980956
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,24,8,128,1,float16,float16,0,0.5662447929382324
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,24,8,128,1,float16,fp8,0,0.49200000762939455
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,24,8,128,1,fp8,fp8,0,0.49182400703430174
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,24,1,128,1,float16,fp8,0,4.421372985839843
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,24,1,128,1,fp8,fp8,0,4.371708679199219
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,24,1,128,1,float16,float16,0,6.328372955322266
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,24,2,128,1,float16,float16,0,7.340086364746094
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,24,2,128,1,float16,fp8,0,4.508609771728516
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,24,2,128,1,fp8,fp8,0,4.401404953002929
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,24,4,128,1,float16,fp8,0,4.6288398742675785
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,24,4,128,1,float16,float16,0,7.121961975097657
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,24,4,128,1,fp8,fp8,0,4.442740631103516
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,24,8,128,1,float16,float16,0,7.629161834716797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,24,24,128,1,float16,float16,0,3.4663551330566404
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,24,8,128,1,float16,fp8,0,4.922289657592773
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,24,8,128,1,fp8,fp8,0,4.579313659667969
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,24,24,128,1,float16,fp8,0,2.4136192321777346
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,24,24,128,1,fp8,fp8,0,2.368377685546875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,24,1,128,1,float16,fp8,0,2.2341232299804688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,24,1,128,1,float16,float16,0,3.509761428833008
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,24,1,128,1,fp8,fp8,0,2.1959808349609373
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,24,2,128,1,float16,fp8,0,2.2458351135253904
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,24,2,128,1,float16,float16,0,3.517695999145508
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,24,2,128,1,fp8,fp8,0,2.6205039978027345
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,24,4,128,1,float16,float16,0,3.5926143646240236
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,24,4,128,1,float16,fp8,0,2.266720008850098
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,24,4,128,1,fp8,fp8,0,2.5260047912597656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,24,8,128,1,float16,float16,0,3.659795379638672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,24,8,128,1,fp8,fp8,0,2.2804447174072267
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,24,8,128,1,float16,fp8,0,2.4545520782470702
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,24,24,128,1,float16,float16,0,1.551524829864502
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,24,24,128,1,fp8,fp8,0,1.2115296363830566
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,24,1,128,1,float16,float16,0,1.2937919616699218
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,24,1,128,1,float16,fp8,0,1.411251163482666
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,24,24,128,1,float16,fp8,0,2.0381376266479494
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,24,1,128,1,fp8,fp8,0,1.1317631721496582
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,24,2,128,1,float16,fp8,0,1.154423999786377
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,24,2,128,1,float16,float16,0,1.7326831817626953
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,24,2,128,1,fp8,fp8,0,1.2925760269165039
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,24,4,128,1,float16,float16,0,1.2558048248291016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,24,4,128,1,float16,fp8,0,1.148147201538086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,24,8,128,1,float16,float16,0,1.2801376342773438
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,24,4,128,1,fp8,fp8,0,1.3798128128051759
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,24,8,128,1,float16,fp8,0,1.485905647277832
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,24,8,128,1,fp8,fp8,0,1.1489392280578614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,24,24,128,1,float16,fp8,0,0.6431280136108398
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,24,24,128,1,float16,float16,0,1.0307968139648438
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,24,24,128,1,fp8,fp8,0,0.6405263900756836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,24,1,128,1,float16,float16,0,0.6892943859100342
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,24,1,128,1,float16,fp8,0,0.6054495811462403
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,24,1,128,1,fp8,fp8,0,0.8320639610290528
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,24,2,128,1,float16,float16,0,0.6739871978759766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,24,2,128,1,fp8,fp8,0,0.6064544200897217
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,24,2,128,1,float16,fp8,0,0.6852511882781982
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,24,4,128,1,float16,fp8,0,0.6123167991638183
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,24,4,128,1,fp8,fp8,0,0.6056511878967286
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,24,4,128,1,float16,float16,0,0.7856832027435303
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,24,8,128,1,float16,float16,0,0.7013936042785645
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,24,24,128,1,float16,float16,0,0.40402398109436033
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,24,24,128,1,float16,fp8,0,0.368284797668457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,24,8,128,1,float16,fp8,0,0.6062464237213134
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,24,8,128,1,fp8,fp8,0,0.6024447917938233
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,24,24,128,1,fp8,fp8,0,0.36193759441375734
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,24,1,128,1,float16,float16,0,0.3868304014205933
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,24,1,128,1,float16,fp8,0,0.3407840013504028
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,24,1,128,1,fp8,fp8,0,0.34240639209747314
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,24,2,128,1,float16,float16,0,0.3838047981262207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,24,2,128,1,float16,fp8,0,0.34145920276641845
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,24,2,128,1,fp8,fp8,0,0.343560004234314
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,24,4,128,1,float16,float16,0,0.38949439525604246
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,24,4,128,1,float16,fp8,0,0.3418031930923462
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,24,4,128,1,fp8,fp8,0,0.34239840507507324
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,24,8,128,1,float16,fp8,0,0.3421519994735718
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,24,8,128,1,float16,float16,0,0.39523680210113527
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,24,8,128,1,fp8,fp8,0,0.3410975933074951
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,24,1,128,1,float16,fp8,0,2.607993507385254
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,24,1,128,1,fp8,fp8,0,2.589904022216797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,24,1,128,1,float16,float16,0,3.1336463928222655
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,24,2,128,1,float16,fp8,0,2.5810720443725588
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,24,2,128,1,float16,float16,0,3.4198062896728514
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,24,2,128,1,fp8,fp8,0,2.5926639556884767
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,24,4,128,1,float16,float16,0,3.3147232055664064
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,24,4,128,1,float16,fp8,0,2.7463632583618165
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,24,4,128,1,fp8,fp8,0,2.6091920852661135
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,24,8,128,1,float16,float16,0,3.85589599609375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,24,8,128,1,float16,fp8,0,2.6037120819091797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,24,24,128,1,float16,float16,0,1.8439119338989258
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,24,8,128,1,fp8,fp8,0,2.8269567489624023
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,24,24,128,1,float16,fp8,0,1.6588415145874023
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,24,24,128,1,fp8,fp8,0,1.4074511528015137
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,24,1,128,1,float16,fp8,0,1.3331199645996095
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,24,1,128,1,float16,float16,0,1.9367919921875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,24,1,128,1,fp8,fp8,0,1.3114048004150392
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,24,2,128,1,float16,float16,0,1.4078960418701172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,24,2,128,1,float16,fp8,0,1.3043359756469726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,24,4,128,1,float16,float16,0,1.438697624206543
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,24,2,128,1,fp8,fp8,0,1.6839136123657226
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,24,4,128,1,float16,fp8,0,1.4533472061157227
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,24,4,128,1,fp8,fp8,0,1.3196528434753418
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,24,8,128,1,float16,fp8,0,1.3467727661132813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,24,24,128,1,float16,float16,0,0.8282416343688965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,24,8,128,1,float16,float16,0,1.976144027709961
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,24,8,128,1,fp8,fp8,0,1.3150624275207519
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,24,24,128,1,float16,fp8,0,0.7476992130279541
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,24,24,128,1,fp8,fp8,0,0.7368480205535889
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,24,1,128,1,float16,fp8,0,0.6929711818695068
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,24,1,128,1,float16,float16,0,1.073089599609375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,24,1,128,1,fp8,fp8,0,0.697547197341919
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,24,2,128,1,float16,float16,0,0.7560431957244873
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,24,2,128,1,float16,fp8,0,0.6919151782989502
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,24,4,128,1,float16,float16,0,0.7703743934631347
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,24,2,128,1,fp8,fp8,0,0.920849609375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,24,4,128,1,float16,fp8,0,0.696456003189087
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,24,4,128,1,fp8,fp8,0,0.690718412399292
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,24,8,128,1,float16,fp8,0,0.6894976139068604
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,24,24,128,1,float16,float16,0,0.44889278411865235
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,24,8,128,1,float16,float16,0,0.7887119770050048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,24,8,128,1,fp8,fp8,0,0.7372960090637207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,24,24,128,1,float16,fp8,0,0.4064000129699707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,24,24,128,1,fp8,fp8,0,0.4036752223968506
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,24,1,128,1,float16,float16,0,0.45561442375183103
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,24,1,128,1,float16,fp8,0,0.380180811882019
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,24,1,128,1,fp8,fp8,0,0.381823992729187
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,24,2,128,1,float16,float16,0,0.4094079971313477
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,24,2,128,1,float16,fp8,0,0.5014063835144043
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,24,2,128,1,fp8,fp8,0,0.37862560749053953
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,24,4,128,1,float16,float16,0,0.41889281272888185
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,24,4,128,1,float16,fp8,0,0.378656005859375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,24,4,128,1,fp8,fp8,0,0.42208638191223147
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,24,8,128,1,float16,float16,0,0.42455358505249025
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,24,8,128,1,float16,fp8,0,0.37819199562072753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,24,8,128,1,fp8,fp8,0,0.3766256093978882
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,24,24,128,1,float16,float16,0,0.2606192111968994
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,24,24,128,1,float16,fp8,0,0.2395695924758911
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,24,24,128,1,fp8,fp8,0,0.2319727897644043
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,24,1,128,1,float16,float16,0,0.23714399337768555
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,24,1,128,1,float16,fp8,0,0.21987359523773192
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,24,1,128,1,fp8,fp8,0,0.22464640140533448
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,24,2,128,1,float16,float16,0,0.23690879344940186
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,24,2,128,1,float16,fp8,0,0.21851201057434083
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,24,2,128,1,fp8,fp8,0,0.21893439292907715
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,24,4,128,1,float16,float16,0,0.24386560916900635
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,24,4,128,1,float16,fp8,0,0.2162544012069702
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,24,4,128,1,fp8,fp8,0,0.21644160747528077
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,24,8,128,1,float16,float16,0,0.2463376045227051
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,24,8,128,1,float16,fp8,0,0.22544639110565184
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,24,8,128,1,fp8,fp8,0,0.21759839057922364
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,24,1,128,1,float16,fp8,0,2.53753604888916
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,24,1,128,1,fp8,fp8,0,2.468008041381836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,24,1,128,1,float16,float16,0,2.6433904647827147
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,24,2,128,1,float16,float16,0,3.2469825744628906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,24,2,128,1,float16,fp8,0,2.5047311782836914
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,24,2,128,1,fp8,fp8,0,2.6189504623413087
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,24,4,128,1,float16,fp8,0,2.593587112426758
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,24,4,128,1,float16,float16,0,3.957574462890625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,24,4,128,1,fp8,fp8,0,2.5519296646118166
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,24,8,128,1,float16,float16,0,3.7018592834472654
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,24,8,128,1,float16,fp8,0,2.5080848693847657
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,24,24,128,1,float16,float16,0,1.5652928352355957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,24,8,128,1,fp8,fp8,0,2.619761657714844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,24,24,128,1,fp8,fp8,0,1.4052415847778321
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,24,24,128,1,float16,fp8,0,1.8129104614257812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,24,1,128,1,float16,fp8,0,1.2722496032714843
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,24,1,128,1,float16,float16,0,1.634587287902832
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,24,1,128,1,fp8,fp8,0,1.2668512344360352
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,24,2,128,1,float16,float16,0,1.3207951545715333
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,24,2,128,1,float16,fp8,0,1.6231151580810548
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,24,2,128,1,fp8,fp8,0,1.2641695976257323
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,24,4,128,1,float16,float16,0,1.682044792175293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,24,4,128,1,float16,fp8,0,1.2701279640197753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,24,4,128,1,fp8,fp8,0,1.2610383987426759
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,24,8,128,1,float16,fp8,0,1.2587471961975099
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,24,8,128,1,fp8,fp8,0,1.2595680236816407
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,24,8,128,1,float16,float16,0,2.0311887741088865
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,24,24,128,1,float16,fp8,0,0.7248879909515381
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,24,24,128,1,float16,float16,0,0.806708812713623
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,24,24,128,1,fp8,fp8,0,1.055355167388916
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,24,1,128,1,float16,float16,0,0.7033455848693848
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,24,1,128,1,float16,fp8,0,0.6976607799530029
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,24,1,128,1,fp8,fp8,0,0.6597856044769287
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,24,2,128,1,float16,float16,0,0.8010831832885742
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,24,2,128,1,fp8,fp8,0,0.6712992191314697
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,24,2,128,1,float16,fp8,0,0.7180895805358887
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,24,4,128,1,float16,float16,0,0.7129424095153809
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,24,4,128,1,float16,fp8,0,0.6611279964447021
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,24,4,128,1,fp8,fp8,0,0.663753604888916
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,24,8,128,1,float16,float16,0,0.7297247886657715
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,24,24,128,1,float16,float16,0,0.4339104175567627
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,24,24,128,1,float16,fp8,0,0.3890511989593506
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,24,8,128,1,float16,fp8,0,0.6554800033569336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,24,8,128,1,fp8,fp8,0,0.6602416038513184
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,24,24,128,1,fp8,fp8,0,0.39164159297943113
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,24,1,128,1,float16,float16,0,0.4002064228057861
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,24,1,128,1,fp8,fp8,0,0.35570878982543946
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,24,1,128,1,float16,fp8,0,0.39597439765930176
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,24,2,128,1,float16,float16,0,0.3824656009674072
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,24,2,128,1,float16,fp8,0,0.43083038330078127
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,24,2,128,1,fp8,fp8,0,0.3525279998779297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,24,4,128,1,float16,float16,0,0.4049520015716553
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,24,4,128,1,float16,fp8,0,0.3547888040542603
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,24,8,128,1,float16,float16,0,0.39078559875488283
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,24,4,128,1,fp8,fp8,0,0.38193440437316895
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,24,8,128,1,float16,fp8,0,0.37489919662475585
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,24,8,128,1,fp8,fp8,0,0.3532975912094116
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,24,24,128,1,float16,float16,0,0.23645920753479005
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,24,24,128,1,float16,fp8,0,0.2185823917388916
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,24,24,128,1,fp8,fp8,0,0.2191632032394409
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,24,1,128,1,float16,float16,0,0.2137295961380005
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,24,1,128,1,float16,fp8,0,0.19677599668502807
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,24,1,128,1,fp8,fp8,0,0.19986079931259154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,24,2,128,1,float16,float16,0,0.21349120140075684
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,24,2,128,1,float16,fp8,0,0.19903839826583863
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,24,2,128,1,fp8,fp8,0,0.19800479412078859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,24,4,128,1,float16,float16,0,0.2165247917175293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,24,4,128,1,fp8,fp8,0,0.19841599464416504
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,24,4,128,1,float16,fp8,0,0.19767040014266968
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,24,8,128,1,float16,float16,0,0.2199552059173584
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,24,8,128,1,float16,fp8,0,0.19907360076904296
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,24,24,128,1,float16,fp8,0,0.12715840339660645
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,24,24,128,1,fp8,fp8,0,0.12685120105743408
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,24,8,128,1,fp8,fp8,0,0.19986079931259154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,24,24,128,1,float16,float16,0,0.1402624011039734
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,24,1,128,1,float16,float16,0,0.12539999485015868
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,24,1,128,1,float16,fp8,0,0.1185215950012207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,24,2,128,1,float16,float16,0,0.1242751955986023
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,24,1,128,1,fp8,fp8,0,0.12010879516601562
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,24,2,128,1,float16,fp8,0,0.11725120544433594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,24,2,128,1,fp8,fp8,0,0.11925439834594727
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,24,4,128,1,float16,fp8,0,0.11902079582214356
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,24,4,128,1,float16,float16,0,0.12702560424804688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,24,4,128,1,fp8,fp8,0,0.11728639602661133
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,24,8,128,1,float16,float16,0,0.13016799688339234
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,24,8,128,1,float16,fp8,0,0.11832000017166137
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,24,8,128,1,fp8,fp8,0,0.11741440296173096
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,24,1,128,1,float16,float16,0,1.5603232383728027
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,24,1,128,1,float16,fp8,0,1.5273839950561523
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,24,1,128,1,fp8,fp8,0,1.529092788696289
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,24,2,128,1,float16,float16,0,1.5737695693969727
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,24,2,128,1,fp8,fp8,0,1.5255840301513672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,24,2,128,1,float16,fp8,0,1.6525760650634767
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,24,4,128,1,float16,float16,0,1.614027214050293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,24,4,128,1,float16,fp8,0,1.5642255783081054
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,24,4,128,1,fp8,fp8,0,1.5235055923461913
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,24,8,128,1,float16,float16,0,1.6406448364257813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,24,8,128,1,fp8,fp8,0,1.548417568206787
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,24,8,128,1,float16,fp8,0,1.7610416412353516
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,24,24,128,1,float16,float16,0,0.9738880157470703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,24,1,128,1,float16,float16,0,0.8390288352966309
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,24,24,128,1,float16,fp8,0,1.0212143898010253
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,24,24,128,1,fp8,fp8,0,0.8916815757751465
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,24,1,128,1,float16,fp8,0,0.9027392387390136
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,24,2,128,1,float16,float16,0,0.8115088462829589
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,24,1,128,1,fp8,fp8,0,0.8520352363586425
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,24,2,128,1,float16,fp8,0,0.7824656009674072
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,24,2,128,1,fp8,fp8,0,0.8792575836181641
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,24,4,128,1,float16,fp8,0,0.7988495826721191
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,24,4,128,1,fp8,fp8,0,0.7821104049682617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,24,4,128,1,float16,float16,0,0.9027440071105957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,24,24,128,1,float16,float16,0,0.5409615993499756
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,24,8,128,1,float16,float16,0,0.8575551986694336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,24,8,128,1,float16,fp8,0,0.7862143993377686
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,24,8,128,1,fp8,fp8,0,0.7794832229614258
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,24,24,128,1,float16,fp8,0,0.5661776065826416
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,24,1,128,1,float16,float16,0,0.4395135879516602
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,24,24,128,1,fp8,fp8,0,0.4670383930206299
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,24,1,128,1,float16,fp8,0,0.41838879585266114
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,24,1,128,1,fp8,fp8,0,0.4151792049407959
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,24,2,128,1,float16,float16,0,0.4389984130859375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,24,2,128,1,float16,fp8,0,0.4122511863708496
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,24,2,128,1,fp8,fp8,0,0.4323472023010254
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,24,4,128,1,float16,float16,0,0.4428271770477295
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,24,4,128,1,float16,fp8,0,0.41165761947631835
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,24,4,128,1,fp8,fp8,0,0.4128848075866699
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,24,8,128,1,float16,float16,0,0.45528640747070315
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,24,8,128,1,float16,fp8,0,0.4117584228515625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,24,24,128,1,float16,float16,0,0.27588319778442383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,24,8,128,1,fp8,fp8,0,0.4115856170654297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,24,24,128,1,float16,fp8,0,0.2526015996932983
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,24,24,128,1,fp8,fp8,0,0.25787200927734377
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,24,1,128,1,float16,float16,0,0.24066560268402098
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,24,1,128,1,float16,fp8,0,0.22469279766082764
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,24,1,128,1,fp8,fp8,0,0.23196640014648437
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,24,2,128,1,float16,float16,0,0.24033920764923095
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,24,2,128,1,float16,fp8,0,0.22426559925079345
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,24,2,128,1,fp8,fp8,0,0.22630560398101807
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,24,4,128,1,float16,float16,0,0.24229600429534912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,24,4,128,1,float16,fp8,0,0.2262336015701294
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,24,4,128,1,fp8,fp8,0,0.22575678825378417
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,24,8,128,1,float16,float16,0,0.2501215934753418
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,24,8,128,1,float16,fp8,0,0.22519519329071044
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,24,8,128,1,fp8,fp8,0,0.2253040075302124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,24,24,128,1,float16,float16,0,0.15578559637069703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,24,24,128,1,float16,fp8,0,0.14266719818115234
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,24,1,128,1,float16,float16,0,0.13476639986038208
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,24,24,128,1,fp8,fp8,0,0.14364639520645142
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,24,1,128,1,float16,fp8,0,0.1275264024734497
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,24,1,128,1,fp8,fp8,0,0.1279520034790039
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,24,2,128,1,float16,float16,0,0.13549599647521973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,24,2,128,1,float16,fp8,0,0.12798399925231935
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,24,2,128,1,fp8,fp8,0,0.1283136010169983
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,24,4,128,1,float16,float16,0,0.13847999572753905
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,24,4,128,1,float16,fp8,0,0.12768640518188476
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,24,4,128,1,fp8,fp8,0,0.12846879959106444
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,24,8,128,1,float16,float16,0,0.14153920412063598
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,24,8,128,1,float16,fp8,0,0.12840319871902467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,24,24,128,1,float16,float16,0,0.09444479942321778
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,24,8,128,1,fp8,fp8,0,0.12901920080184937
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,24,24,128,1,float16,fp8,0,0.08694720268249512
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,24,24,128,1,fp8,fp8,0,0.08727999925613403
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,24,1,128,1,float16,float16,0,0.0853663980960846
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,24,1,128,1,float16,fp8,0,0.07995359897613526
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,24,2,128,1,float16,float16,0,0.08562560081481933
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,24,1,128,1,fp8,fp8,0,0.08027999997138976
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,24,2,128,1,float16,fp8,0,0.0813152015209198
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,24,2,128,1,fp8,fp8,0,0.08078399896621705
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,24,4,128,1,float16,float16,0,0.08456959724426269
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,24,4,128,1,float16,fp8,0,0.08300160169601441
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,24,4,128,1,fp8,fp8,0,0.08071839809417725
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,24,8,128,1,float16,fp8,0,0.08106880187988282
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,24,8,128,1,fp8,fp8,0,0.0804751992225647
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,24,8,128,1,float16,float16,0,0.08733760118484497
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,24,1,128,1,fp8,fp8,0,1.5611680030822754
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,24,1,128,1,float16,fp8,0,1.5619423866271973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,24,1,128,1,float16,float16,0,1.6918832778930664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,24,2,128,1,float16,float16,0,1.6665887832641602
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,24,2,128,1,float16,fp8,0,1.557356834411621
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,24,2,128,1,fp8,fp8,0,1.5598896026611329
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,24,4,128,1,float16,fp8,0,1.7064096450805664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,24,4,128,1,float16,float16,0,2.089227294921875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,24,4,128,1,fp8,fp8,0,1.5572640419006347
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,24,8,128,1,float16,float16,0,1.6668336868286133
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,24,8,128,1,float16,fp8,0,1.5503840446472168
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,24,8,128,1,fp8,fp8,0,1.7903312683105468
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,24,24,128,1,float16,float16,0,1.0116991996765137
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,24,24,128,1,float16,fp8,0,1.0914048194885253
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,24,24,128,1,fp8,fp8,0,0.9388640403747559
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,24,1,128,1,float16,float16,0,0.8161264419555664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,24,1,128,1,float16,fp8,0,0.8084015846252441
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,24,1,128,1,fp8,fp8,0,0.7998320102691651
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,24,2,128,1,float16,float16,0,0.8006784439086914
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,24,2,128,1,float16,fp8,0,0.8732512474060059
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,24,2,128,1,fp8,fp8,0,0.7975232124328613
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,24,4,128,1,float16,float16,0,0.8197168350219727
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,24,4,128,1,float16,fp8,0,0.7928944110870362
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,24,4,128,1,fp8,fp8,0,0.8296480178833008
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,24,8,128,1,float16,float16,0,0.8483407974243165
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,24,8,128,1,float16,fp8,0,0.9172719955444336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,24,24,128,1,float16,float16,0,0.5297264099121094
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,24,24,128,1,float16,fp8,0,0.48624482154846194
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,24,8,128,1,fp8,fp8,0,0.7941359996795654
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,24,24,128,1,fp8,fp8,0,0.4848911762237549
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,24,1,128,1,float16,float16,0,0.425710391998291
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,24,1,128,1,fp8,fp8,0,0.4184000015258789
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,24,1,128,1,float16,fp8,0,0.47371678352355956
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,24,2,128,1,float16,float16,0,0.422976016998291
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,24,2,128,1,float16,fp8,0,0.4188064098358154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,24,2,128,1,fp8,fp8,0,0.41637439727783204
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,24,4,128,1,float16,float16,0,0.4721983909606934
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,24,4,128,1,float16,fp8,0,0.4148719787597656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,24,4,128,1,fp8,fp8,0,0.42005600929260256
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,24,8,128,1,float16,fp8,0,0.4120639801025391
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,24,8,128,1,float16,float16,0,0.451478385925293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,24,8,128,1,fp8,fp8,0,0.4130544185638428
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,24,24,128,1,float16,float16,0,0.2803967952728271
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,24,24,128,1,float16,fp8,0,0.25760478973388673
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,24,1,128,1,float16,float16,0,0.23032000064849853
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,24,24,128,1,fp8,fp8,0,0.25835840702056884
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,24,1,128,1,float16,fp8,0,0.22292160987854004
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,24,1,128,1,fp8,fp8,0,0.22411201000213624
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,24,2,128,1,float16,float16,0,0.22983360290527344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,24,2,128,1,float16,fp8,0,0.22286720275878907
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,24,2,128,1,fp8,fp8,0,0.22413759231567382
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,24,4,128,1,float16,float16,0,0.23403680324554443
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,24,4,128,1,float16,fp8,0,0.22256479263305665
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,24,4,128,1,fp8,fp8,0,0.22313439846038818
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,24,8,128,1,float16,float16,0,0.24321599006652833
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,24,8,128,1,float16,fp8,0,0.22177278995513916
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,24,24,128,1,float16,float16,0,0.15380959510803222
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,24,24,128,1,float16,fp8,0,0.14464960098266602
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,24,8,128,1,fp8,fp8,0,0.2231856107711792
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,24,1,128,1,float16,float16,0,0.12715840339660645
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,24,1,128,1,float16,fp8,0,0.12347199916839599
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,24,24,128,1,fp8,fp8,0,0.14345599412918092
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,24,1,128,1,fp8,fp8,0,0.12294080257415771
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,24,2,128,1,float16,float16,0,0.1291264057159424
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,24,2,128,1,float16,fp8,0,0.12346080541610718
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,24,2,128,1,fp8,fp8,0,0.12251839637756348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,24,4,128,1,float16,float16,0,0.13047200441360474
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,24,4,128,1,float16,fp8,0,0.12363359928131104
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,24,4,128,1,fp8,fp8,0,0.12403039932250977
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,24,8,128,1,float16,fp8,0,0.12279200553894043
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,24,8,128,1,float16,float16,0,0.13598400354385376
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,24,8,128,1,fp8,fp8,0,0.12416800260543823
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,24,24,128,1,float16,float16,0,0.09012159705162048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,24,24,128,1,float16,fp8,0,0.08375200033187866
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,24,24,128,1,fp8,fp8,0,0.08352959752082825
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,24,1,128,1,float16,float16,0,0.07649279832839966
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,24,1,128,1,float16,fp8,0,0.07466880083084107
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,24,1,128,1,fp8,fp8,0,0.07392320036888123
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,24,2,128,1,float16,float16,0,0.0765936017036438
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,24,2,128,1,float16,fp8,0,0.07451519966125489
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,24,2,128,1,fp8,fp8,0,0.07512159943580628
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,24,4,128,1,float16,float16,0,0.07734240293502807
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,24,4,128,1,float16,fp8,0,0.07470080256462097
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,24,4,128,1,fp8,fp8,0,0.0749776005744934
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,24,8,128,1,float16,float16,0,0.07928000092506408
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,24,8,128,1,float16,fp8,0,0.07507200241088867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,24,24,128,1,float16,float16,0,0.05516800284385681
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,24,8,128,1,fp8,fp8,0,0.07423359751701356
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,24,24,128,1,float16,fp8,0,0.055632001161575316
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,24,24,128,1,fp8,fp8,0,0.05571200251579285
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,24,1,128,1,float16,float16,0,0.05127679705619812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,24,1,128,1,float16,fp8,0,0.05119680166244507
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,24,1,128,1,fp8,fp8,0,0.05151360034942627
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,24,2,128,1,float16,float16,0,0.05154399871826172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,24,2,128,1,float16,fp8,0,0.0509984016418457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,24,4,128,1,float16,float16,0,0.052502399682998656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,24,2,128,1,fp8,fp8,0,0.050944000482559204
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,24,4,128,1,float16,fp8,0,0.051158398389816284
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,24,4,128,1,fp8,fp8,0,0.05146560072898865
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,24,8,128,1,float16,float16,0,0.05390560030937195
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,24,8,128,1,float16,fp8,0,0.05130720138549805
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,24,8,128,1,fp8,fp8,0,0.05015040040016174
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,24,1,128,1,float16,float16,0,0.9887760162353516
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,24,1,128,1,fp8,fp8,0,1.0063232421875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,24,1,128,1,float16,fp8,0,1.00753755569458
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,24,2,128,1,float16,float16,0,0.9797103881835938
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,24,2,128,1,float16,fp8,0,1.0042176246643066
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,24,2,128,1,fp8,fp8,0,1.0066335678100586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,24,4,128,1,float16,float16,0,1.0094639778137207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,24,4,128,1,float16,fp8,0,1.087441635131836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,24,4,128,1,fp8,fp8,0,1.0019519805908204
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,24,8,128,1,float16,float16,0,1.107363224029541
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,24,8,128,1,float16,fp8,0,0.9970527648925781
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,24,24,128,1,float16,float16,0,0.6695248126983643
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,24,24,128,1,fp8,fp8,0,0.6216671943664551
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,24,24,128,1,float16,fp8,0,0.7229040145874024
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,24,8,128,1,fp8,fp8,0,1.0016271591186523
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,24,1,128,1,float16,float16,0,0.5071040153503418
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,24,1,128,1,float16,fp8,0,0.5187215805053711
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,24,1,128,1,fp8,fp8,0,0.5264848232269287
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,24,2,128,1,float16,float16,0,0.5130479812622071
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,24,2,128,1,float16,fp8,0,0.5159200191497803
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,24,2,128,1,fp8,fp8,0,0.5183663845062256
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,24,4,128,1,float16,float16,0,0.527123212814331
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,24,4,128,1,float16,fp8,0,0.5185711860656739
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,24,4,128,1,fp8,fp8,0,0.514470386505127
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,24,8,128,1,float16,float16,0,0.5517759799957276
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,24,8,128,1,float16,fp8,0,0.5154399871826172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,24,24,128,1,float16,float16,0,0.34753758907318116
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,24,8,128,1,fp8,fp8,0,0.5156144142150879
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,24,24,128,1,float16,fp8,0,0.3258800029754639
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,24,24,128,1,fp8,fp8,0,0.32630081176757814
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,24,1,128,1,float16,float16,0,0.26973919868469237
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,24,1,128,1,float16,fp8,0,0.2748239994049072
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,24,1,128,1,fp8,fp8,0,0.27371039390563967
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,24,2,128,1,float16,float16,0,0.2715087890625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,24,2,128,1,float16,fp8,0,0.2720223903656006
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,24,2,128,1,fp8,fp8,0,0.2723567962646484
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,24,4,128,1,float16,float16,0,0.2784751892089844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,24,4,128,1,float16,fp8,0,0.2734431982040405
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,24,4,128,1,fp8,fp8,0,0.27133119106292725
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,24,8,128,1,float16,float16,0,0.2928368091583252
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,24,8,128,1,float16,fp8,0,0.2707887887954712
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,24,8,128,1,fp8,fp8,0,0.2715087890625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,24,24,128,1,float16,float16,0,0.18838880062103272
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,24,24,128,1,float16,fp8,0,0.17584160566329957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,24,1,128,1,float16,float16,0,0.14976320266723633
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,24,24,128,1,fp8,fp8,0,0.17555999755859375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,24,1,128,1,float16,fp8,0,0.14762239456176757
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,24,1,128,1,fp8,fp8,0,0.14729759693145753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,24,2,128,1,float16,float16,0,0.1500975966453552
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,24,2,128,1,float16,fp8,0,0.1477455973625183
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,24,2,128,1,fp8,fp8,0,0.1478656053543091
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,24,4,128,1,float16,float16,0,0.15275360345840455
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,24,4,128,1,float16,fp8,0,0.1480847954750061
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,24,4,128,1,fp8,fp8,0,0.1476032018661499
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,24,8,128,1,float16,float16,0,0.1606783986091614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,24,24,128,1,float16,float16,0,0.10438079833984375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,24,8,128,1,float16,fp8,0,0.14819200038909913
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,24,8,128,1,fp8,fp8,0,0.14752479791641235
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,24,24,128,1,float16,fp8,0,0.09879840016365052
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,24,1,128,1,float16,float16,0,0.08425120115280152
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,24,24,128,1,fp8,fp8,0,0.09908000230789185
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,24,1,128,1,float16,fp8,0,0.08409919738769531
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,24,2,128,1,float16,float16,0,0.08333439826965332
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,24,1,128,1,fp8,fp8,0,0.08340160250663757
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,24,2,128,1,float16,fp8,0,0.08316640257835388
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,24,2,128,1,fp8,fp8,0,0.08295040130615235
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,24,4,128,1,float16,float16,0,0.08582080006599427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,24,4,128,1,float16,fp8,0,0.08319360017776489
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,24,8,128,1,float16,float16,0,0.09063199758529664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,24,4,128,1,fp8,fp8,0,0.08336960077285767
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,24,8,128,1,float16,fp8,0,0.08374879956245422
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,24,8,128,1,fp8,fp8,0,0.08386560082435608
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,24,24,128,1,float16,float16,0,0.061812800168991086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,24,24,128,1,float16,fp8,0,0.05944640040397644
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,24,24,128,1,fp8,fp8,0,0.059520000219345094
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,24,1,128,1,float16,float16,0,0.05203679800033569
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,24,1,128,1,float16,fp8,0,0.05260319709777832
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,24,1,128,1,fp8,fp8,0,0.05225279927253723
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,24,2,128,1,float16,float16,0,0.05250399708747864
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,24,2,128,1,float16,fp8,0,0.052369600534439086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,24,2,128,1,fp8,fp8,0,0.05248640179634094
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,24,4,128,1,float16,float16,0,0.05336160063743591
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,24,4,128,1,float16,fp8,0,0.0521664023399353
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,24,4,128,1,fp8,fp8,0,0.05216479897499084
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,24,8,128,1,float16,float16,0,0.056492799520492555
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,24,8,128,1,float16,fp8,0,0.052742397785186766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,24,8,128,1,fp8,fp8,0,0.05203840136528015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,24,24,128,1,float16,fp8,0,0.035017600655555724
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,24,24,128,1,float16,float16,0,0.034990400075912476
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,24,24,128,1,fp8,fp8,0,0.035041600465774536
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,24,1,128,1,float16,float16,0,0.03165439963340759
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,24,1,128,1,float16,fp8,0,0.031435200572013856
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,24,1,128,1,fp8,fp8,0,0.031009599566459656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,24,2,128,1,float16,float16,0,0.032867199182510375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,24,2,128,1,float16,fp8,0,0.03236159980297089
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,24,2,128,1,fp8,fp8,0,0.030964800715446474
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,24,4,128,1,float16,float16,0,0.03298240005970001
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,24,4,128,1,float16,fp8,0,0.03103039860725403
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,24,8,128,1,float16,float16,0,0.03327839970588684
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,24,4,128,1,fp8,fp8,0,0.03091520071029663
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,24,8,128,1,fp8,fp8,0,0.032790398597717284
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,24,8,128,1,float16,fp8,0,0.031518399715423584
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,24,1,128,1,float16,float16,0,1.0414239883422851
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,24,1,128,1,float16,fp8,0,1.1034111976623535
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,24,1,128,1,fp8,fp8,0,1.0972543716430665
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,24,2,128,1,float16,float16,0,1.0457136154174804
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,24,2,128,1,float16,fp8,0,1.099788761138916
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,24,2,128,1,fp8,fp8,0,1.0939760208129883
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,24,4,128,1,float16,float16,0,1.0652655601501464
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,24,4,128,1,float16,fp8,0,1.301851177215576
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,24,4,128,1,fp8,fp8,0,1.0979151725769043
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,24,8,128,1,float16,float16,0,1.174891185760498
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,24,24,128,1,float16,float16,0,0.7586063861846923
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,24,8,128,1,float16,fp8,0,1.0941295623779297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,24,24,128,1,float16,fp8,0,0.7870639801025391
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,24,8,128,1,fp8,fp8,0,1.0951087951660157
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,24,1,128,1,float16,float16,0,0.531879997253418
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,24,24,128,1,fp8,fp8,0,0.7042416095733642
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,24,1,128,1,float16,fp8,0,0.5631375789642334
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,24,1,128,1,fp8,fp8,0,0.5644847869873046
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,24,2,128,1,float16,float16,0,0.5298880100250244
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,24,2,128,1,float16,fp8,0,0.5631279945373535
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,24,2,128,1,fp8,fp8,0,0.5619872093200684
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,24,4,128,1,float16,float16,0,0.5549007892608643
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,24,4,128,1,float16,fp8,0,0.5599135875701904
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,24,4,128,1,fp8,fp8,0,0.5614880084991455
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,24,8,128,1,float16,fp8,0,0.5592175960540772
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,24,8,128,1,float16,float16,0,0.5910079956054688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,24,24,128,1,float16,float16,0,0.3894304037094116
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,24,24,128,1,float16,fp8,0,0.36356000900268554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,24,8,128,1,fp8,fp8,0,0.5626480102539062
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,24,24,128,1,fp8,fp8,0,0.3643631935119629
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,24,1,128,1,float16,float16,0,0.28227360248565675
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,24,1,128,1,float16,fp8,0,0.2941999912261963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,24,2,128,1,float16,float16,0,0.281713604927063
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,24,1,128,1,fp8,fp8,0,0.2928816080093384
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,24,2,128,1,float16,fp8,0,0.2938751935958862
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,24,2,128,1,fp8,fp8,0,0.29310240745544436
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,24,4,128,1,float16,fp8,0,0.2917439937591553
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,24,4,128,1,float16,float16,0,0.2912015914916992
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,24,4,128,1,fp8,fp8,0,0.2925040006637573
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,24,8,128,1,float16,float16,0,0.310263991355896
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,24,8,128,1,float16,fp8,0,0.2905424118041992
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,24,8,128,1,fp8,fp8,0,0.2915888071060181
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,24,24,128,1,float16,float16,0,0.20508160591125488
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,24,24,128,1,float16,fp8,0,0.19289920330047608
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,24,24,128,1,fp8,fp8,0,0.1936800003051758
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,24,1,128,1,float16,float16,0,0.1528175950050354
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,24,1,128,1,float16,fp8,0,0.156985604763031
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,24,1,128,1,fp8,fp8,0,0.157806396484375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,24,2,128,1,float16,float16,0,0.15228159427642823
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,24,2,128,1,float16,fp8,0,0.1567088007926941
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,24,2,128,1,fp8,fp8,0,0.15783519744873048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,24,4,128,1,float16,float16,0,0.15724799633026124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,24,4,128,1,float16,fp8,0,0.15554879903793334
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,24,4,128,1,fp8,fp8,0,0.156496000289917
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,24,8,128,1,float16,float16,0,0.16714080572128295
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,24,8,128,1,float16,fp8,0,0.15620319843292235
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,24,24,128,1,float16,float16,0,0.1123471975326538
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,24,8,128,1,fp8,fp8,0,0.15656640529632568
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,24,24,128,1,float16,fp8,0,0.10667040348052978
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,24,24,128,1,fp8,fp8,0,0.10656319856643677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,24,1,128,1,float16,float16,0,0.08587520122528076
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,24,1,128,1,float16,fp8,0,0.08655999898910523
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,24,2,128,1,float16,float16,0,0.08697760105133057
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,24,2,128,1,float16,fp8,0,0.08624320030212403
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,24,1,128,1,fp8,fp8,0,0.08621439933776856
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,24,4,128,1,float16,float16,0,0.0884447991847992
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,24,2,128,1,fp8,fp8,0,0.08720639944076539
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,24,4,128,1,float16,fp8,0,0.08661440014839172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,24,4,128,1,fp8,fp8,0,0.08711519837379456
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,24,8,128,1,float16,float16,0,0.09330880045890808
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,24,8,128,1,float16,fp8,0,0.08728960156440735
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,24,24,128,1,float16,float16,0,0.06485599875450135
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,24,24,128,1,float16,fp8,0,0.06207039952278137
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,24,8,128,1,fp8,fp8,0,0.08687360286712646
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,24,1,128,1,float16,float16,0,0.05140960216522217
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,24,24,128,1,fp8,fp8,0,0.060791999101638794
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,24,1,128,1,float16,fp8,0,0.05191199779510498
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,24,2,128,1,float16,float16,0,0.05117440223693848
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,24,1,128,1,fp8,fp8,0,0.051902401447296145
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,24,2,128,1,float16,fp8,0,0.05291360020637512
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,24,2,128,1,fp8,fp8,0,0.052129602432250975
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,24,4,128,1,float16,float16,0,0.05260639786720276
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,24,4,128,1,float16,fp8,0,0.05217599868774414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,24,4,128,1,fp8,fp8,0,0.052121597528457644
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,24,8,128,1,float16,float16,0,0.054574400186538696
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,24,8,128,1,float16,fp8,0,0.05185279846191406
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,24,24,128,1,float16,float16,0,0.039139199256896975
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,24,8,128,1,fp8,fp8,0,0.05198079943656921
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,24,24,128,1,float16,fp8,0,0.04045119881629944
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,24,24,128,1,fp8,fp8,0,0.03981919884681702
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,24,1,128,1,float16,float16,0,0.036148801445961
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,24,1,128,1,float16,fp8,0,0.03567200005054474
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,24,1,128,1,fp8,fp8,0,0.035132798552513125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,24,2,128,1,float16,float16,0,0.03611519932746887
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,24,2,128,1,float16,fp8,0,0.035604798793792726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,24,4,128,1,float16,float16,0,0.03672800064086914
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,24,2,128,1,fp8,fp8,0,0.03545440137386322
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,24,4,128,1,float16,fp8,0,0.035534399747848514
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,24,4,128,1,fp8,fp8,0,0.03684319853782654
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,24,8,128,1,float16,float16,0,0.03714239895343781
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,24,8,128,1,float16,fp8,0,0.03540480136871338
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,24,24,128,1,float16,float16,0,0.026774400472640993
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,24,8,128,1,fp8,fp8,0,0.0366239994764328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,24,24,128,1,float16,fp8,0,0.026972800493240356
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,24,1,128,1,float16,float16,0,0.024900799989700316
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,24,24,128,1,fp8,fp8,0,0.026833599805831908
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,24,1,128,1,float16,fp8,0,0.02489279955625534
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,24,1,128,1,fp8,fp8,0,0.024817599356174468
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,24,2,128,1,float16,float16,0,0.02476159930229187
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,24,2,128,1,float16,fp8,0,0.024817599356174468
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,24,2,128,1,fp8,fp8,0,0.02475520074367523
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,24,4,128,1,float16,float16,0,0.024796800315380098
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,24,4,128,1,float16,fp8,0,0.024879999458789825
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,24,4,128,1,fp8,fp8,0,0.024792000651359558
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,24,8,128,1,float16,fp8,0,0.024855999648571013
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,24,8,128,1,fp8,fp8,0,0.02481440007686615
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,24,8,128,1,float16,float16,0,0.024859200417995452
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,24,1,128,1,float16,float16,0,0.779095983505249
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,24,1,128,1,fp8,fp8,0,0.8654335975646973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,24,1,128,1,float16,fp8,0,0.8688896179199219
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,24,2,128,1,float16,float16,0,0.7742144107818604
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,24,2,128,1,float16,fp8,0,0.8622271537780761
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,24,4,128,1,float16,float16,0,0.8164239883422851
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,24,2,128,1,fp8,fp8,0,0.8625071525573731
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,24,4,128,1,float16,fp8,0,0.8593503952026367
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,24,4,128,1,fp8,fp8,0,0.860968017578125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,24,8,128,1,float16,float16,0,0.8905200004577637
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,24,8,128,1,float16,fp8,0,0.8585007667541504
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,24,24,128,1,float16,float16,0,0.6154287815093994
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,24,8,128,1,fp8,fp8,0,0.8576656341552734
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,24,24,128,1,float16,fp8,0,0.5880608081817627
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,24,24,128,1,fp8,fp8,0,0.5800752162933349
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,24,1,128,1,float16,float16,0,0.39921600818634034
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,24,2,128,1,float16,float16,0,0.40181598663330076
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,24,1,128,1,float16,fp8,0,0.44362077713012693
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,24,1,128,1,fp8,fp8,0,0.4468495845794678
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,24,2,128,1,float16,fp8,0,0.44028801918029786
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,24,4,128,1,float16,float16,0,0.41867518424987793
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,24,2,128,1,fp8,fp8,0,0.44188480377197265
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,24,4,128,1,float16,fp8,0,0.441315221786499
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,24,4,128,1,fp8,fp8,0,0.4397696018218994
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,24,8,128,1,float16,float16,0,0.45733280181884767
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,24,8,128,1,float16,fp8,0,0.4390560150146484
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,24,24,128,1,float16,float16,0,0.3189584016799927
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,24,24,128,1,float16,fp8,0,0.3010063886642456
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,24,8,128,1,fp8,fp8,0,0.43848319053649903
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,24,1,128,1,float16,float16,0,0.21253280639648436
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,24,24,128,1,fp8,fp8,0,0.2996432065963745
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,24,1,128,1,float16,fp8,0,0.23182079792022706
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,24,1,128,1,fp8,fp8,0,0.23180320262908935
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,24,2,128,1,float16,float16,0,0.21175680160522461
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,24,2,128,1,float16,fp8,0,0.2303839921951294
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,24,2,128,1,fp8,fp8,0,0.23141920566558838
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,24,4,128,1,float16,float16,0,0.2204832077026367
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,24,4,128,1,float16,fp8,0,0.22976961135864257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,24,4,128,1,fp8,fp8,0,0.22994558811187743
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,24,8,128,1,float16,float16,0,0.2403264045715332
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,24,8,128,1,float16,fp8,0,0.22885921001434326
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,24,8,128,1,fp8,fp8,0,0.22950561046600343
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,24,24,128,1,float16,float16,0,0.16639679670333862
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,24,24,128,1,float16,fp8,0,0.15892319679260253
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,24,1,128,1,float16,float16,0,0.11499520540237426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,24,24,128,1,fp8,fp8,0,0.15986080169677735
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,24,1,128,1,float16,fp8,0,0.12331520318984986
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,24,1,128,1,fp8,fp8,0,0.12323360443115235
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,24,2,128,1,float16,float16,0,0.11586079597473145
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,24,2,128,1,float16,fp8,0,0.12306720018386841
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,24,4,128,1,float16,float16,0,0.12016160488128662
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,24,2,128,1,fp8,fp8,0,0.12311040163040161
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,24,4,128,1,float16,fp8,0,0.12238080501556396
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,24,4,128,1,fp8,fp8,0,0.1232367992401123
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,24,8,128,1,float16,float16,0,0.12919360399246216
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,24,8,128,1,float16,fp8,0,0.1218559980392456
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,24,24,128,1,float16,float16,0,0.0904911994934082
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,24,8,128,1,fp8,fp8,0,0.12300479412078857
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,24,24,128,1,float16,fp8,0,0.08629279732704162
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,24,24,128,1,fp8,fp8,0,0.08625919818878174
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,24,1,128,1,float16,float16,0,0.0637776017189026
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,24,1,128,1,float16,fp8,0,0.06704480051994324
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,24,1,128,1,fp8,fp8,0,0.06716639995574951
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,24,2,128,1,float16,float16,0,0.06418560147285461
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,24,2,128,1,float16,fp8,0,0.06711999773979187
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,24,2,128,1,fp8,fp8,0,0.06723039746284484
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,24,4,128,1,float16,float16,0,0.06751360297203064
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,24,4,128,1,float16,fp8,0,0.06723200082778931
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,24,4,128,1,fp8,fp8,0,0.06767680048942566
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,24,8,128,1,float16,float16,0,0.07281439900398254
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,24,8,128,1,float16,fp8,0,0.06789119839668274
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,24,24,128,1,float16,float16,0,0.05245919823646546
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,24,24,128,1,float16,fp8,0,0.04952960014343262
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,24,8,128,1,fp8,fp8,0,0.067740797996521
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,24,24,128,1,fp8,fp8,0,0.04959680140018463
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,24,1,128,1,float16,float16,0,0.03915199935436249
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,24,1,128,1,float16,fp8,0,0.04116320013999939
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,24,1,128,1,fp8,fp8,0,0.041091200709342954
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,24,2,128,1,float16,float16,0,0.03911519944667816
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,24,2,128,1,float16,fp8,0,0.04116480052471161
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,24,2,128,1,fp8,fp8,0,0.04115679860115051
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,24,4,128,1,float16,float16,0,0.0437279999256134
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,24,4,128,1,float16,fp8,0,0.04112319946289063
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,24,4,128,1,fp8,fp8,0,0.0410863995552063
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,24,8,128,1,float16,fp8,0,0.040406399965286256
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,24,8,128,1,float16,float16,0,0.042121601104736325
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,24,8,128,1,fp8,fp8,0,0.04128639996051788
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,24,24,128,1,float16,float16,0,0.030876800417900085
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,24,24,128,1,float16,fp8,0,0.0329120010137558
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,24,24,128,1,fp8,fp8,0,0.03300800025463104
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,24,1,128,1,float16,float16,0,0.026921600103378296
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,24,1,128,1,float16,fp8,0,0.0284496009349823
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,24,1,128,1,fp8,fp8,0,0.028751999139785767
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,24,2,128,1,float16,float16,0,0.026820799708366393
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,24,2,128,1,float16,fp8,0,0.02887200117111206
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,24,4,128,1,float16,float16,0,0.028758400678634645
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,24,4,128,1,float16,fp8,0,0.02881760001182556
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,24,2,128,1,fp8,fp8,0,0.02886880040168762
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,24,4,128,1,fp8,fp8,0,0.02877120077610016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,24,8,128,1,float16,float16,0,0.028884801268577575
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,24,24,128,1,float16,float16,0,0.02091200053691864
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,24,8,128,1,float16,fp8,0,0.028880000114440918
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,24,8,128,1,fp8,fp8,0,0.02887200117111206
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,24,24,128,1,float16,fp8,0,0.022596800327301027
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,24,24,128,1,fp8,fp8,0,0.022635200619697572
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,24,1,128,1,float16,float16,0,0.018619200587272643
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,24,1,128,1,float16,fp8,0,0.018670399487018586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,24,1,128,1,fp8,fp8,0,0.019044800102710722
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,24,2,128,1,float16,float16,0,0.018745599687099455
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,24,2,128,1,float16,fp8,0,0.01881439983844757
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,24,4,128,1,float16,float16,0,0.01876319944858551
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,24,2,128,1,fp8,fp8,0,0.01863359957933426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,24,4,128,1,float16,fp8,0,0.020555199682712556
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,24,8,128,1,float16,float16,0,0.020510399341583253
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,24,4,128,1,fp8,fp8,0,0.018705600500106813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,24,8,128,1,float16,fp8,0,0.018606400489807128
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,24,8,128,1,fp8,fp8,0,0.02056480050086975
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,24,24,128,1,float16,float16,0,0.020664000511169435
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,24,24,128,1,float16,fp8,0,0.018697600066661834
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,24,1,128,1,float16,float16,0,0.01878879964351654
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,24,24,128,1,fp8,fp8,0,0.01889760047197342
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,24,1,128,1,float16,fp8,0,0.01876640021800995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,24,1,128,1,fp8,fp8,0,0.018588800728321076
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,24,2,128,1,float16,float16,0,0.018731200695037843
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,24,2,128,1,float16,fp8,0,0.01870719939470291
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,24,2,128,1,fp8,fp8,0,0.018587200343608855
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,24,4,128,1,float16,fp8,0,0.018796800076961516
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,24,4,128,1,float16,float16,0,0.01860480010509491
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,24,4,128,1,fp8,fp8,0,0.018568000197410582
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,24,8,128,1,float16,float16,0,0.01870400011539459
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,24,8,128,1,float16,fp8,0,0.0186256006360054
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,24,8,128,1,fp8,fp8,0,0.018622399866580965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,24,1,128,1,float16,float16,0,0.33007519245147704
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,24,1,128,1,fp8,fp8,0,0.37922239303588867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,24,1,128,1,float16,fp8,0,0.3781280040740967
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,24,2,128,1,float16,float16,0,0.3281968116760254
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,24,2,128,1,float16,fp8,0,0.3771215915679932
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,24,2,128,1,fp8,fp8,0,0.3757375955581665
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,24,4,128,1,float16,float16,0,0.3479487895965576
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,24,4,128,1,float16,fp8,0,0.3763983964920044
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,24,4,128,1,fp8,fp8,0,0.37477760314941405
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,24,8,128,1,float16,float16,0,0.38461599349975584
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,24,8,128,1,float16,fp8,0,0.3749536037445068
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,24,8,128,1,fp8,fp8,0,0.37379519939422606
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,24,24,128,1,float16,float16,0,0.28010079860687254
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,24,24,128,1,float16,fp8,0,0.26557919979095457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,24,24,128,1,fp8,fp8,0,0.2659327983856201
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,24,1,128,1,float16,float16,0,0.1746127963066101
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,24,1,128,1,float16,fp8,0,0.19839359521865846
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,24,1,128,1,fp8,fp8,0,0.1972991943359375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,24,2,128,1,float16,float16,0,0.1747856020927429
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,24,2,128,1,float16,fp8,0,0.19749759435653685
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,24,2,128,1,fp8,fp8,0,0.19591039419174194
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,24,4,128,1,float16,float16,0,0.1840335965156555
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,24,4,128,1,float16,fp8,0,0.19716479778289794
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,24,4,128,1,fp8,fp8,0,0.19552960395812988
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,24,8,128,1,float16,float16,0,0.20226240158081055
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,24,8,128,1,float16,fp8,0,0.19659520387649537
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,24,24,128,1,float16,float16,0,0.14844000339508057
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,24,8,128,1,fp8,fp8,0,0.19509600400924682
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,24,24,128,1,float16,fp8,0,0.14197440147399903
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,24,1,128,1,float16,float16,0,0.09521120190620422
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,24,24,128,1,fp8,fp8,0,0.14176160097122192
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,24,1,128,1,float16,fp8,0,0.10660480260848999
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,24,1,128,1,fp8,fp8,0,0.10576640367507935
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,24,2,128,1,float16,float16,0,0.09636639952659606
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,24,2,128,1,float16,fp8,0,0.1067247986793518
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,24,2,128,1,fp8,fp8,0,0.10487840175628663
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,24,4,128,1,float16,float16,0,0.10083999633789062
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,24,4,128,1,float16,fp8,0,0.10614720582962037
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,24,4,128,1,fp8,fp8,0,0.10615999698638916
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,24,8,128,1,float16,float16,0,0.11017919778823852
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,24,8,128,1,float16,fp8,0,0.10560319423675538
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,24,24,128,1,float16,float16,0,0.08304479718208313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,24,8,128,1,fp8,fp8,0,0.1061568021774292
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,24,24,128,1,float16,fp8,0,0.07843199968338013
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,24,1,128,1,float16,float16,0,0.054612797498703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,24,24,128,1,fp8,fp8,0,0.07977120280265808
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,24,1,128,1,float16,fp8,0,0.05905439853668213
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,24,1,128,1,fp8,fp8,0,0.05905119776725769
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,24,2,128,1,float16,float16,0,0.05578719973564148
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,24,2,128,1,float16,fp8,0,0.05878880023956299
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,24,4,128,1,float16,float16,0,0.05888479948043823
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,24,2,128,1,fp8,fp8,0,0.05958240032196045
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,24,4,128,1,float16,fp8,0,0.05923200249671936
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,24,4,128,1,fp8,fp8,0,0.05968800187110901
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,24,8,128,1,float16,float16,0,0.06275680065155029
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,24,8,128,1,float16,fp8,0,0.06037120223045349
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,24,24,128,1,float16,float16,0,0.045332801342010495
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,24,8,128,1,fp8,fp8,0,0.06036639809608459
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,24,24,128,1,float16,fp8,0,0.04317759871482849
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,24,1,128,1,float16,float16,0,0.03293760120868683
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,24,24,128,1,fp8,fp8,0,0.04349760115146637
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,24,1,128,1,float16,fp8,0,0.034944000840187076
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,24,1,128,1,fp8,fp8,0,0.035067200660705566
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,24,2,128,1,float16,float16,0,0.03130559921264649
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,24,2,128,1,float16,fp8,0,0.03508960008621216
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,24,2,128,1,fp8,fp8,0,0.03493120074272156
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,24,4,128,1,float16,float16,0,0.032996800541877744
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,24,4,128,1,fp8,fp8,0,0.03484640121459961
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,24,8,128,1,float16,float16,0,0.03558399975299835
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,24,4,128,1,float16,fp8,0,0.035036799311637876
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,24,8,128,1,float16,fp8,0,0.034959998726844785
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,24,24,128,1,float16,float16,0,0.026939201354980468
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,24,8,128,1,fp8,fp8,0,0.034964799880981445
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,24,24,128,1,fp8,fp8,0,0.028940799832344054
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,24,24,128,1,float16,fp8,0,0.028990399837493897
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,24,1,128,1,float16,float16,0,0.02361599951982498
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,24,1,128,1,float16,fp8,0,0.0247296005487442
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,24,1,128,1,fp8,fp8,0,0.02480800002813339
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,24,2,128,1,float16,float16,0,0.023639999330043793
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,24,2,128,1,float16,fp8,0,0.024809600412845613
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,24,2,128,1,fp8,fp8,0,0.02476799935102463
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,24,4,128,1,float16,float16,0,0.02475520074367523
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,24,4,128,1,fp8,fp8,0,0.024827200174331664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,24,4,128,1,float16,fp8,0,0.02502079904079437
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,24,8,128,1,float16,float16,0,0.024771200120449068
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,24,8,128,1,float16,fp8,0,0.024859200417995452
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,24,8,128,1,fp8,fp8,0,0.024775999784469604
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,24,24,128,1,float16,fp8,0,0.01860959976911545
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,24,24,128,1,float16,float16,0,0.018615999817848207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,24,24,128,1,fp8,fp8,0,0.018654400110244752
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,24,1,128,1,float16,float16,0,0.016516800224781036
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,24,1,128,1,float16,fp8,0,0.01679999977350235
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,24,1,128,1,fp8,fp8,0,0.01674560010433197
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,24,2,128,1,float16,float16,0,0.01671359986066818
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,24,2,128,1,float16,fp8,0,0.016715200245380403
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,24,4,128,1,float16,float16,0,0.01669919937849045
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,24,2,128,1,fp8,fp8,0,0.01685439944267273
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,24,4,128,1,float16,fp8,0,0.01676799952983856
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,24,4,128,1,fp8,fp8,0,0.016726399958133697
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,24,8,128,1,float16,float16,0,0.01679359972476959
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,24,8,128,1,float16,fp8,0,0.016684800386428833
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,24,8,128,1,fp8,fp8,0,0.016697600483894348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,24,24,128,1,float16,float16,0,0.01666879951953888
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,24,24,128,1,float16,fp8,0,0.016835199296474458
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,24,24,128,1,fp8,fp8,0,0.017451199889183044
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,24,1,128,1,float16,float16,0,0.01661919951438904
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,24,1,128,1,float16,fp8,0,0.016467200219631196
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,24,1,128,1,fp8,fp8,0,0.01659359931945801
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,24,2,128,1,fp8,fp8,0,0.016553600132465363
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,24,2,128,1,float16,float16,0,0.01658080071210861
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,24,2,128,1,float16,fp8,0,0.016439999639987945
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,24,4,128,1,float16,float16,0,0.016542400419712066
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,24,4,128,1,float16,fp8,0,0.01658560037612915
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,24,8,128,1,float16,float16,0,0.01653759926557541
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,24,4,128,1,fp8,fp8,0,0.016633599996566772
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,24,8,128,1,float16,fp8,0,0.01658560037612915
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,24,8,128,1,fp8,fp8,0,0.016433599591255187
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,24,24,128,1,float16,float16,0,0.016599999368190767
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,24,24,128,1,float16,fp8,0,0.01656000018119812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,24,24,128,1,fp8,fp8,0,0.016523200273513793
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,24,1,128,1,float16,fp8,0,0.014641599357128143
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,24,1,128,1,float16,float16,0,0.01488959938287735
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,24,1,128,1,fp8,fp8,0,0.01467359960079193
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,24,2,128,1,float16,float16,0,0.014740799367427827
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,24,2,128,1,float16,fp8,0,0.01483680009841919
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,24,2,128,1,fp8,fp8,0,0.016484799981117248
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,24,4,128,1,float16,fp8,0,0.014694400131702423
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,24,4,128,1,float16,float16,0,0.01459839940071106
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,24,4,128,1,fp8,fp8,0,0.016390399634838106
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,24,8,128,1,float16,float16,0,0.016407999396324157
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,24,8,128,1,float16,fp8,0,0.015241600573062897
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,24,8,128,1,fp8,fp8,0,0.016444799304008485
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,24,1,128,1,float16,float16,0,0.20206239223480224
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,24,1,128,1,fp8,fp8,0,0.2263024091720581
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,24,1,128,1,float16,fp8,0,0.2252255916595459
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,24,2,128,1,float16,float16,0,0.20113599300384521
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,24,2,128,1,float16,fp8,0,0.2258143901824951
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,24,2,128,1,fp8,fp8,0,0.22401440143585205
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,24,4,128,1,float16,float16,0,0.21005599498748778
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,24,4,128,1,float16,fp8,0,0.22490880489349366
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,24,4,128,1,fp8,fp8,0,0.22363998889923095
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,24,8,128,1,float16,float16,0,0.22922239303588868
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,24,8,128,1,float16,fp8,0,0.22438719272613525
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,24,8,128,1,fp8,fp8,0,0.22375841140747071
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,24,24,128,1,float16,float16,0,0.1582543969154358
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,24,24,128,1,float16,fp8,0,0.15408799648284913
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,24,24,128,1,fp8,fp8,0,0.1529871940612793
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,24,1,128,1,float16,float16,0,0.10805599689483643
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,24,1,128,1,float16,fp8,0,0.11950399875640869
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,24,1,128,1,fp8,fp8,0,0.1181615948677063
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,24,2,128,1,float16,float16,0,0.10835200548171997
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,24,2,128,1,float16,fp8,0,0.11923199892044067
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,24,4,128,1,float16,float16,0,0.11274720430374145
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,24,2,128,1,fp8,fp8,0,0.11742719411849975
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,24,4,128,1,float16,fp8,0,0.11899199485778808
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,24,8,128,1,float16,fp8,0,0.1190224051475525
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,24,4,128,1,fp8,fp8,0,0.11941440105438232
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,24,8,128,1,float16,float16,0,0.1212048053741455
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,24,24,128,1,float16,float16,0,0.08329439759254456
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,24,8,128,1,fp8,fp8,0,0.11911360025405884
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,24,24,128,1,float16,fp8,0,0.08253759741783143
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,24,1,128,1,float16,float16,0,0.060139197111129764
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,24,24,128,1,fp8,fp8,0,0.08380159735679626
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,24,1,128,1,float16,fp8,0,0.06385120153427123
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,24,2,128,1,float16,float16,0,0.06078400015830994
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,24,1,128,1,fp8,fp8,0,0.06528639793395996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,24,2,128,1,float16,fp8,0,0.06372640132904053
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,24,4,128,1,float16,float16,0,0.063755202293396
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,24,2,128,1,fp8,fp8,0,0.06418880224227905
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,24,4,128,1,float16,fp8,0,0.06446560025215149
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,24,4,128,1,fp8,fp8,0,0.06574879884719849
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,24,8,128,1,float16,float16,0,0.06759999990463257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,24,8,128,1,float16,fp8,0,0.0652944028377533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,24,8,128,1,fp8,fp8,0,0.06587679982185364
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,24,24,128,1,float16,float16,0,0.04750399887561798
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,24,24,128,1,float16,fp8,0,0.047363200783729555
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,24,1,128,1,float16,float16,0,0.03514559864997864
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,24,24,128,1,fp8,fp8,0,0.04721600115299225
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,24,1,128,1,float16,fp8,0,0.037371200323104856
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,24,2,128,1,float16,float16,0,0.03504799902439117
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,24,1,128,1,fp8,fp8,0,0.03762399852275848
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,24,2,128,1,float16,fp8,0,0.0372079998254776
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,24,2,128,1,fp8,fp8,0,0.0374752014875412
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,24,4,128,1,float16,float16,0,0.03668160140514374
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,24,4,128,1,float16,fp8,0,0.0371535986661911
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,24,8,128,1,float16,float16,0,0.03912799954414368
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,24,4,128,1,fp8,fp8,0,0.03761439919471741
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,24,8,128,1,float16,fp8,0,0.03729760050773621
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,24,8,128,1,fp8,fp8,0,0.037651199102401736
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,24,24,128,1,float16,float16,0,0.024886399507522583
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,24,24,128,1,fp8,fp8,0,0.02823840081691742
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,24,24,128,1,float16,fp8,0,0.02874560058116913
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,24,1,128,1,float16,float16,0,0.02213120013475418
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,24,1,128,1,fp8,fp8,0,0.023500800132751465
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,24,2,128,1,float16,float16,0,0.022579200565814972
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,24,1,128,1,float16,fp8,0,0.022995199263095855
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,24,2,128,1,float16,fp8,0,0.023524799942970277
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,24,2,128,1,fp8,fp8,0,0.022896000742912294
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,24,4,128,1,float16,fp8,0,0.0227743998169899
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,24,4,128,1,float16,float16,0,0.022878399491310118
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,24,4,128,1,fp8,fp8,0,0.022809599339962006
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,24,8,128,1,float16,float16,0,0.022852799296379088
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,24,8,128,1,fp8,fp8,0,0.024007999897003175
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,24,8,128,1,float16,fp8,0,0.02295520007610321
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,24,24,128,1,float16,fp8,0,0.02059199959039688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,24,24,128,1,float16,float16,0,0.018513600528240203
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,24,24,128,1,fp8,fp8,0,0.020729599893093108
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,24,1,128,1,float16,float16,0,0.016689600050449373
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,24,1,128,1,float16,fp8,0,0.01842560023069382
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,24,2,128,1,float16,float16,0,0.01675360053777695
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,24,1,128,1,fp8,fp8,0,0.018718400597572328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,24,2,128,1,float16,fp8,0,0.018534399569034576
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,24,2,128,1,fp8,fp8,0,0.018486399948596955
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,24,4,128,1,float16,fp8,0,0.018481600284576415
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,24,4,128,1,float16,float16,0,0.016832000017166136
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,24,4,128,1,fp8,fp8,0,0.01858240067958832
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,24,8,128,1,float16,float16,0,0.018489600718021394
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,24,8,128,1,fp8,fp8,0,0.01857759952545166
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,24,8,128,1,float16,fp8,0,0.017297600209712983
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,24,24,128,1,float16,float16,0,0.014448000490665436
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,24,24,128,1,float16,fp8,0,0.01454399973154068
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,24,24,128,1,fp8,fp8,0,0.014662399888038635
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,24,1,128,1,float16,float16,0,0.012582400441169738
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,24,1,128,1,float16,fp8,0,0.012611199915409089
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,24,2,128,1,float16,float16,0,0.012534399330615998
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,24,1,128,1,fp8,fp8,0,0.012580800056457519
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,24,2,128,1,float16,fp8,0,0.012614400684833526
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,24,2,128,1,fp8,fp8,0,0.012620800733566284
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,24,4,128,1,float16,fp8,0,0.012603199481964112
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,24,4,128,1,float16,float16,0,0.012590399384498597
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,24,4,128,1,fp8,fp8,0,0.012593600153923034
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,24,8,128,1,float16,float16,0,0.012600000202655792
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,24,8,128,1,float16,fp8,0,0.012656000256538392
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,24,24,128,1,float16,float16,0,0.012577599287033081
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,24,8,128,1,fp8,fp8,0,0.012608000636100769
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,24,24,128,1,fp8,fp8,0,0.01260959953069687
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,24,1,128,1,float16,float16,0,0.01244639977812767
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,24,24,128,1,float16,fp8,0,0.012579199671745301
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,24,1,128,1,float16,fp8,0,0.012503999471664428
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,24,1,128,1,fp8,fp8,0,0.01249919980764389
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,24,2,128,1,float16,fp8,0,0.012459199875593185
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,24,2,128,1,float16,float16,0,0.01250080019235611
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,24,2,128,1,fp8,fp8,0,0.012451200187206269
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,24,4,128,1,float16,float16,0,0.012488000094890594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,24,4,128,1,float16,fp8,0,0.012569600343704223
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,24,4,128,1,fp8,fp8,0,0.012564800679683685
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,24,8,128,1,float16,float16,0,0.0125231996178627
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,24,8,128,1,fp8,fp8,0,0.0124719999730587
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,24,8,128,1,float16,fp8,0,0.012641599774360657
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,24,24,128,1,float16,float16,0,0.012516799569129943
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,24,1,128,1,float16,float16,0,0.012470400333404541
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,24,24,128,1,float16,fp8,0,0.012615999579429627
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,24,24,128,1,fp8,fp8,0,0.012483199685811996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,24,1,128,1,float16,fp8,0,0.012387199699878693
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,24,2,128,1,float16,float16,0,0.012459199875593185
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,24,1,128,1,fp8,fp8,0,0.012451200187206269
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,24,2,128,1,float16,fp8,0,0.01244800016283989
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,24,2,128,1,fp8,fp8,0,0.012483199685811996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,24,4,128,1,float16,float16,0,0.012439999729394913
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,24,4,128,1,float16,fp8,0,0.012353599816560746
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,24,4,128,1,fp8,fp8,0,0.012436799705028534
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,24,8,128,1,float16,float16,0,0.0124719999730587
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,24,8,128,1,float16,fp8,0,0.012483199685811996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,24,8,128,1,fp8,fp8,0,0.012462399899959564
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,24,1,128,1,float16,float16,0,0.15752639770507812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,24,1,128,1,float16,fp8,0,0.16779199838638306
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,24,1,128,1,fp8,fp8,0,0.16690399646759033
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,24,2,128,1,float16,float16,0,0.15681760311126708
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,24,2,128,1,float16,fp8,0,0.1675376057624817
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,24,2,128,1,fp8,fp8,0,0.16664639711380005
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,24,4,128,1,float16,float16,0,0.16217119693756105
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,24,4,128,1,float16,fp8,0,0.16743199825286864
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,24,4,128,1,fp8,fp8,0,0.1662287950515747
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,24,8,128,1,float16,float16,0,0.17027679681777955
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,24,8,128,1,float16,fp8,0,0.1669968008995056
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,24,8,128,1,fp8,fp8,0,0.16616480350494384
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,24,24,128,1,float16,fp8,0,0.10707679986953736
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,24,24,128,1,float16,float16,0,0.10672800540924073
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,24,1,128,1,float16,float16,0,0.08505280017852783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,24,24,128,1,fp8,fp8,0,0.10693119764328003
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,24,1,128,1,float16,fp8,0,0.08836960196495056
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,24,1,128,1,fp8,fp8,0,0.0883023977279663
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,24,2,128,1,float16,float16,0,0.08559200167655945
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,24,2,128,1,float16,fp8,0,0.08828319907188416
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,24,2,128,1,fp8,fp8,0,0.0886031985282898
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,24,4,128,1,float16,float16,0,0.088264000415802
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,24,4,128,1,float16,fp8,0,0.08843200206756592
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,24,4,128,1,fp8,fp8,0,0.08856959939002991
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,24,8,128,1,float16,float16,0,0.09208160042762756
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,24,8,128,1,float16,fp8,0,0.08838080167770386
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,24,8,128,1,fp8,fp8,0,0.08830400109291077
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,24,24,128,1,float16,float16,0,0.05851519703865051
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,24,24,128,1,float16,fp8,0,0.05774719715118408
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,24,24,128,1,fp8,fp8,0,0.05781919956207275
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,24,1,128,1,float16,float16,0,0.04698080122470856
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,24,1,128,1,float16,fp8,0,0.049425598978996274
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,24,1,128,1,fp8,fp8,0,0.049327999353408813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,24,2,128,1,float16,float16,0,0.047367998957633974
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,24,2,128,1,float16,fp8,0,0.04931359887123108
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,24,2,128,1,fp8,fp8,0,0.04944159984588623
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,24,4,128,1,float16,float16,0,0.047513601183891294
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,24,4,128,1,float16,fp8,0,0.04937280118465424
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,24,8,128,1,float16,float16,0,0.049584001302719116
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,24,8,128,1,float16,fp8,0,0.04947839975357056
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,24,4,128,1,fp8,fp8,0,0.04976159930229187
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,24,8,128,1,fp8,fp8,0,0.049377599358558656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,24,24,128,1,float16,float16,0,0.03115360140800476
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,24,24,128,1,float16,fp8,0,0.034622400999069214
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,24,24,128,1,fp8,fp8,0,0.03406400084495544
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,24,1,128,1,float16,float16,0,0.02884800136089325
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,24,1,128,1,fp8,fp8,0,0.028958401083946227
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,24,1,128,1,float16,fp8,0,0.029083201289176942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,24,2,128,1,float16,float16,0,0.028993600606918336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,24,2,128,1,fp8,fp8,0,0.029311999678611755
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,24,2,128,1,float16,fp8,0,0.03041599988937378
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,24,4,128,1,float16,float16,0,0.028995200991630554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,24,4,128,1,float16,fp8,0,0.028998398780822755
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,24,8,128,1,float16,float16,0,0.03089280128479004
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,24,4,128,1,fp8,fp8,0,0.03028160035610199
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,24,8,128,1,fp8,fp8,0,0.030695998668670656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,24,8,128,1,float16,fp8,0,0.029177600145339967
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,24,24,128,1,float16,float16,0,0.020740799605846405
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,24,24,128,1,float16,fp8,0,0.021660800278186797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,24,1,128,1,float16,float16,0,0.018622399866580965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,24,24,128,1,fp8,fp8,0,0.020824000239372253
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,24,1,128,1,float16,fp8,0,0.0188960000872612
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,24,1,128,1,fp8,fp8,0,0.018681600689888
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,24,2,128,1,float16,float16,0,0.01878879964351654
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,24,2,128,1,float16,fp8,0,0.01873439997434616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,24,2,128,1,fp8,fp8,0,0.018745599687099455
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,24,4,128,1,float16,fp8,0,0.01879359930753708
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,24,4,128,1,float16,float16,0,0.01889919936656952
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,24,4,128,1,fp8,fp8,0,0.018643200397491455
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,24,8,128,1,float16,float16,0,0.018756799399852753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,24,8,128,1,float16,fp8,0,0.018614399433135986
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,24,8,128,1,fp8,fp8,0,0.018697600066661834
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,24,24,128,1,float16,float16,0,0.016516800224781036
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,24,24,128,1,float16,fp8,0,0.016590400040149687
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,24,24,128,1,fp8,fp8,0,0.01658560037612915
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,24,1,128,1,float16,float16,0,0.014723199605941772
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,24,1,128,1,fp8,fp8,0,0.014502400159835815
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,24,1,128,1,float16,fp8,0,0.01456640064716339
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,24,2,128,1,float16,fp8,0,0.014603200554847717
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,24,2,128,1,float16,float16,0,0.014697599411010741
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,24,2,128,1,fp8,fp8,0,0.014510400593280792
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,24,4,128,1,float16,float16,0,0.01451839953660965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,24,4,128,1,float16,fp8,0,0.014644800126552582
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,24,4,128,1,fp8,fp8,0,0.014619199931621552
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,24,8,128,1,float16,float16,0,0.014531199634075165
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,24,8,128,1,float16,fp8,0,0.014718399941921234
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,24,8,128,1,fp8,fp8,0,0.014545600116252898
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,24,24,128,1,float16,float16,0,0.012451200187206269
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,24,24,128,1,float16,fp8,0,0.01263359934091568
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,24,24,128,1,fp8,fp8,0,0.0124719999730587
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,24,1,128,1,float16,float16,0,0.010521599650382995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,24,1,128,1,float16,fp8,0,0.012460800260305405
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,24,1,128,1,fp8,fp8,0,0.011585599929094314
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,24,2,128,1,float16,float16,0,0.010630399733781815
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,24,2,128,1,float16,fp8,0,0.012483199685811996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,24,2,128,1,fp8,fp8,0,0.01244639977812767
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,24,4,128,1,float16,float16,0,0.01050880029797554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,24,4,128,1,float16,fp8,0,0.012412799894809723
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,24,4,128,1,fp8,fp8,0,0.012399999797344208
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,24,8,128,1,float16,float16,0,0.010468800365924836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,24,8,128,1,float16,fp8,0,0.012388800084590913
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,24,8,128,1,fp8,fp8,0,0.01242400035262108
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,24,24,128,1,float16,fp8,0,0.010540799796581268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,24,24,128,1,float16,float16,0,0.010505600273609162
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,24,24,128,1,fp8,fp8,0,0.010486400127410889
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,24,1,128,1,float16,float16,0,0.01045759990811348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,24,1,128,1,float16,fp8,0,0.010406400263309478
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,24,1,128,1,fp8,fp8,0,0.01061279997229576
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,24,2,128,1,float16,float16,0,0.010526400059461594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,24,2,128,1,float16,fp8,0,0.010494399815797806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,24,2,128,1,fp8,fp8,0,0.010411199927330018
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,24,4,128,1,float16,float16,0,0.010496000200510025
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,24,4,128,1,float16,fp8,0,0.010427200049161912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,24,8,128,1,float16,fp8,0,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,24,8,128,1,float16,float16,0,0.010545600205659866
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,24,4,128,1,fp8,fp8,0,0.010467199981212616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,24,8,128,1,fp8,fp8,0,0.010547199845314026
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,24,24,128,1,float16,float16,0,0.012411200255155564
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,24,24,128,1,float16,fp8,0,0.010377600044012069
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,24,24,128,1,fp8,fp8,0,0.010406400263309478
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,24,1,128,1,float16,float16,0,0.010540799796581268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,24,1,128,1,float16,fp8,0,0.010416000336408614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,24,1,128,1,fp8,fp8,0,0.010451199859380722
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,24,2,128,1,float16,float16,0,0.010548800230026245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,24,2,128,1,float16,fp8,0,0.010412800312042236
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,24,2,128,1,fp8,fp8,0,0.010449600219726563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,24,4,128,1,float16,float16,0,0.010414399951696397
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,24,4,128,1,float16,fp8,0,0.010487999767065048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,24,4,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,24,8,128,1,float16,float16,0,0.01048320010304451
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,24,8,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,24,8,128,1,fp8,fp8,0,0.01043040007352829
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,24,1,128,1,float16,float16,0,0.13547199964523315
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,24,1,128,1,float16,fp8,0,0.13910080194473268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,24,1,128,1,fp8,fp8,0,0.1381791949272156
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,24,2,128,1,float16,float16,0,0.13579360246658326
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,24,2,128,1,fp8,fp8,0,0.13873280286788942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,24,2,128,1,float16,fp8,0,0.1376512050628662
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,24,4,128,1,float16,float16,0,0.1384768009185791
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,24,4,128,1,float16,fp8,0,0.13950079679489136
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,24,4,128,1,fp8,fp8,0,0.1396000027656555
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,24,8,128,1,float16,float16,0,0.14226239919662476
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,24,8,128,1,float16,fp8,0,0.139628803730011
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,24,24,128,1,float16,float16,0,0.0838703989982605
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,24,8,128,1,fp8,fp8,0,0.1399183988571167
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,24,24,128,1,float16,fp8,0,0.08261759877204895
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,24,24,128,1,fp8,fp8,0,0.08366240262985229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,24,1,128,1,float16,float16,0,0.07238879799842834
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,24,1,128,1,float16,fp8,0,0.07423679828643799
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,24,1,128,1,fp8,fp8,0,0.07397440075874329
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,24,2,128,1,float16,float16,0,0.07218239903450012
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,24,2,128,1,float16,fp8,0,0.07410560250282287
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,24,2,128,1,fp8,fp8,0,0.07402719855308533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,24,4,128,1,float16,float16,0,0.0730351984500885
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,24,4,128,1,float16,fp8,0,0.07416639924049377
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,24,4,128,1,fp8,fp8,0,0.07399680018424988
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,24,8,128,1,float16,float16,0,0.07630879878997802
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,24,8,128,1,fp8,fp8,0,0.0739408016204834
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,24,8,128,1,float16,fp8,0,0.074099200963974
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,24,24,128,1,float16,float16,0,0.04556800127029419
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,24,24,128,1,float16,fp8,0,0.04531520009040833
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,24,24,128,1,fp8,fp8,0,0.045351999998092654
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,24,1,128,1,float16,float16,0,0.04139519929885864
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,24,1,128,1,float16,fp8,0,0.04122720062732697
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,24,1,128,1,fp8,fp8,0,0.04126240015029907
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,24,2,128,1,float16,fp8,0,0.04121440052986145
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,24,2,128,1,float16,float16,0,0.041391998529434204
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,24,2,128,1,fp8,fp8,0,0.041247999668121337
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,24,4,128,1,float16,float16,0,0.041222399473190306
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,24,4,128,1,float16,fp8,0,0.04254719913005829
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,24,4,128,1,fp8,fp8,0,0.04129759967327118
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,24,8,128,1,float16,float16,0,0.042972800135612485
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,24,8,128,1,float16,fp8,0,0.04261760115623474
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,24,24,128,1,float16,float16,0,0.026940798759460448
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,24,8,128,1,fp8,fp8,0,0.04176799952983856
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,24,24,128,1,float16,fp8,0,0.02885279953479767
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,24,1,128,1,float16,float16,0,0.02557600140571594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,24,24,128,1,fp8,fp8,0,0.028984001278877257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,24,1,128,1,float16,fp8,0,0.026800000667572023
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,24,1,128,1,fp8,fp8,0,0.027035200595855714
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,24,2,128,1,float16,float16,0,0.025259199738502502
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,24,2,128,1,float16,fp8,0,0.026688000559806822
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,24,2,128,1,fp8,fp8,0,0.02686559855937958
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,24,4,128,1,float16,float16,0,0.026001599431037904
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,24,4,128,1,float16,fp8,0,0.026840001344680786
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,24,4,128,1,fp8,fp8,0,0.02680639922618866
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,24,8,128,1,float16,float16,0,0.026836800575256347
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,24,8,128,1,float16,fp8,0,0.02669920027256012
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,24,8,128,1,fp8,fp8,0,0.026875200867652892
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,24,24,128,1,float16,float16,0,0.01857600063085556
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,24,24,128,1,float16,fp8,0,0.018639999628067016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,24,1,128,1,float16,float16,0,0.016569599509239197
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,24,24,128,1,fp8,fp8,0,0.018723200261592864
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,24,1,128,1,float16,fp8,0,0.016820800304412842
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,24,1,128,1,fp8,fp8,0,0.01666080057621002
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,24,2,128,1,float16,float16,0,0.016667200624942778
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,24,2,128,1,float16,fp8,0,0.016843199729919434
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,24,2,128,1,fp8,fp8,0,0.01659200042486191
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,24,4,128,1,float16,float16,0,0.016708800196647645
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,24,4,128,1,float16,fp8,0,0.01671999990940094
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,24,4,128,1,fp8,fp8,0,0.016622400283813475
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,24,8,128,1,float16,float16,0,0.016651199758052827
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,24,8,128,1,float16,fp8,0,0.016697600483894348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,24,8,128,1,fp8,fp8,0,0.01669120043516159
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,24,24,128,1,float16,float16,0,0.01451839953660965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,24,24,128,1,float16,fp8,0,0.014696000516414643
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,24,24,128,1,fp8,fp8,0,0.014619199931621552
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,24,1,128,1,float16,float16,0,0.01276479959487915
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,24,1,128,1,float16,fp8,0,0.014496000111103058
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,24,1,128,1,fp8,fp8,0,0.014508800208568573
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,24,2,128,1,float16,float16,0,0.014508800208568573
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,24,2,128,1,float16,fp8,0,0.0146479994058609
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,24,2,128,1,fp8,fp8,0,0.014555199444293976
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,24,4,128,1,float16,float16,0,0.014603200554847717
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,24,4,128,1,float16,fp8,0,0.01446560025215149
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,24,4,128,1,fp8,fp8,0,0.014500799775123595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,24,8,128,1,float16,float16,0,0.014519999921321868
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,24,8,128,1,float16,fp8,0,0.014505599439144135
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,24,8,128,1,fp8,fp8,0,0.014449599385261535
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,24,24,128,1,float16,float16,0,0.01234079971909523
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,24,24,128,1,float16,fp8,0,0.012427199631929398
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,24,24,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,24,1,128,1,float16,float16,0,0.010444799810647965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,24,1,128,1,float16,fp8,0,0.010471999645233154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,24,1,128,1,fp8,fp8,0,0.0104032002389431
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,24,2,128,1,float16,float16,0,0.01045600026845932
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,24,2,128,1,float16,fp8,0,0.010449600219726563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,24,2,128,1,fp8,fp8,0,0.010487999767065048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,24,4,128,1,float16,float16,0,0.010465600341558457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,24,4,128,1,float16,fp8,0,0.01043360009789467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,24,4,128,1,fp8,fp8,0,0.01045600026845932
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,24,8,128,1,float16,float16,0,0.010440000146627427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,24,8,128,1,float16,fp8,0,0.011488000303506852
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,24,8,128,1,fp8,fp8,0,0.01043040007352829
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,24,24,128,1,float16,float16,0,0.010407999902963639
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,24,24,128,1,float16,fp8,0,0.011020799726247787
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,24,24,128,1,fp8,fp8,0,0.010543999820947647
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,24,1,128,1,float16,float16,0,0.010425599664449692
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,24,1,128,1,float16,fp8,0,0.010526400059461594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,24,1,128,1,fp8,fp8,0,0.01037440001964569
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,24,2,128,1,float16,float16,0,0.010425599664449692
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,24,2,128,1,float16,fp8,0,0.010507199913263321
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,24,2,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,24,4,128,1,float16,float16,0,0.01048479974269867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,24,4,128,1,float16,fp8,0,0.010468800365924836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,24,4,128,1,fp8,fp8,0,0.010364799946546554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,24,8,128,1,float16,float16,0,0.01050880029797554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,24,8,128,1,float16,fp8,0,0.01053759977221489
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,24,8,128,1,fp8,fp8,0,0.010444799810647965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,24,24,128,1,float16,float16,0,0.010576000064611435
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,24,24,128,1,float16,fp8,0,0.01050880029797554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,24,24,128,1,fp8,fp8,0,0.010412800312042236
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,24,1,128,1,float16,float16,0,0.010470400005578995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,24,1,128,1,float16,fp8,0,0.010520000010728836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,24,1,128,1,fp8,fp8,0,0.010371199995279311
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,24,2,128,1,float16,float16,0,0.010516799986362457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,24,2,128,1,float16,fp8,0,0.0105103999376297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,24,2,128,1,fp8,fp8,0,0.01040479987859726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,24,4,128,1,float16,float16,0,0.010419200360774993
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,24,4,128,1,float16,fp8,0,0.010521599650382995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,24,4,128,1,fp8,fp8,0,0.010393600165843963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,24,8,128,1,float16,float16,0,0.01048159971833229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,24,8,128,1,float16,fp8,0,0.01048479974269867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,24,8,128,1,fp8,fp8,0,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,24,1,128,1,float16,float16,0,0.127457594871521
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,24,1,128,1,float16,fp8,0,0.12321120500564575
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,24,1,128,1,fp8,fp8,0,0.1246351957321167
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,24,2,128,1,float16,float16,0,0.12741919755935668
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,24,2,128,1,float16,fp8,0,0.12323039770126343
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,24,2,128,1,fp8,fp8,0,0.1253775954246521
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,24,4,128,1,float16,float16,0,0.1295040011405945
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,24,4,128,1,float16,fp8,0,0.12334400415420532
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,24,4,128,1,fp8,fp8,0,0.12455999851226807
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,24,8,128,1,float16,float16,0,0.133406400680542
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,24,8,128,1,fp8,fp8,0,0.12514400482177734
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,24,8,128,1,float16,fp8,0,0.12311840057373047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,24,24,128,1,float16,fp8,0,0.07126719951629638
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,24,24,128,1,float16,float16,0,0.07323840260505676
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,24,24,128,1,fp8,fp8,0,0.07007359862327575
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,24,1,128,1,float16,float16,0,0.06992800235748291
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,24,1,128,1,float16,fp8,0,0.06607999801635742
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,24,1,128,1,fp8,fp8,0,0.0658735990524292
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,24,2,128,1,float16,float16,0,0.0699455976486206
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,24,2,128,1,fp8,fp8,0,0.0658191978931427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,24,2,128,1,float16,fp8,0,0.06586880087852479
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,24,4,128,1,float16,float16,0,0.07021120190620422
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,24,4,128,1,float16,fp8,0,0.06580479741096497
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,24,4,128,1,fp8,fp8,0,0.06578559875488281
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,24,8,128,1,float16,float16,0,0.07164959907531739
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,24,8,128,1,float16,fp8,0,0.06579520106315613
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,24,8,128,1,fp8,fp8,0,0.06581119894981384
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,24,24,128,1,float16,float16,0,0.0412447988986969
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,24,24,128,1,float16,fp8,0,0.04103519916534424
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,24,24,128,1,fp8,fp8,0,0.041064000129699706
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,24,1,128,1,float16,float16,0,0.03915359973907471
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,24,1,128,1,float16,fp8,0,0.03920640051364899
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,24,1,128,1,fp8,fp8,0,0.037467199563980105
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,24,2,128,1,float16,float16,0,0.03909280002117157
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,24,2,128,1,float16,fp8,0,0.039099198579788205
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,24,2,128,1,fp8,fp8,0,0.03847199976444245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,24,4,128,1,float16,float16,0,0.03928160071372986
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,24,4,128,1,float16,fp8,0,0.03903360068798065
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,24,4,128,1,fp8,fp8,0,0.03859040141105652
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,24,8,128,1,float16,float16,0,0.04030880033969879
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,24,8,128,1,float16,fp8,0,0.03905119895935059
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,24,8,128,1,fp8,fp8,0,0.03911199867725372
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,24,24,128,1,float16,float16,0,0.025054401159286498
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,24,24,128,1,float16,fp8,0,0.024872000515460967
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,24,24,128,1,fp8,fp8,0,0.02479359954595566
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,24,1,128,1,float16,float16,0,0.024772800505161285
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,24,1,128,1,float16,fp8,0,0.02475520074367523
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,24,1,128,1,fp8,fp8,0,0.024822400510311128
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,24,2,128,1,float16,float16,0,0.024777600169181825
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,24,2,128,1,float16,fp8,0,0.024747200310230255
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,24,2,128,1,fp8,fp8,0,0.024758400022983552
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,24,4,128,1,float16,float16,0,0.024769599735736846
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,24,4,128,1,float16,fp8,0,0.02481440007686615
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,24,4,128,1,fp8,fp8,0,0.024728000164031982
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,24,8,128,1,float16,float16,0,0.024774399399757386
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,24,8,128,1,float16,fp8,0,0.024699200689792634
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,24,24,128,1,float16,float16,0,0.01661120057106018
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,24,8,128,1,fp8,fp8,0,0.024779200553894043
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,24,24,128,1,float16,fp8,0,0.01672160029411316
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,24,24,128,1,fp8,fp8,0,0.016547200083732606
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,24,1,128,1,float16,float16,0,0.016625599563121797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,24,1,128,1,float16,fp8,0,0.016448000073432924
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,24,1,128,1,fp8,fp8,0,0.016648000478744505
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,24,2,128,1,float16,float16,0,0.01661760061979294
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,24,2,128,1,float16,fp8,0,0.01720159947872162
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,24,2,128,1,fp8,fp8,0,0.016606399416923524
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,24,4,128,1,float16,float16,0,0.016596800088882445
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,24,4,128,1,float16,fp8,0,0.01653439998626709
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,24,4,128,1,fp8,fp8,0,0.016579200327396394
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,24,8,128,1,float16,float16,0,0.016676799952983858
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,24,8,128,1,float16,fp8,0,0.016548800468444824
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,24,8,128,1,fp8,fp8,0,0.016438399255275727
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,24,24,128,1,float16,float16,0,0.014480000734329224
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,24,24,128,1,fp8,fp8,0,0.012993599474430084
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,24,24,128,1,float16,fp8,0,0.014452800154685974
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,24,1,128,1,float16,float16,0,0.012547199428081513
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,24,1,128,1,float16,fp8,0,0.01266240030527115
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,24,1,128,1,fp8,fp8,0,0.013753600418567657
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,24,2,128,1,float16,float16,0,0.01255359947681427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,24,2,128,1,float16,fp8,0,0.012624000012874604
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,24,2,128,1,fp8,fp8,0,0.014470399916172027
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,24,4,128,1,float16,float16,0,0.01252799928188324
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,24,4,128,1,fp8,fp8,0,0.012695999443531036
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,24,4,128,1,float16,fp8,0,0.012718400359153748
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,24,8,128,1,float16,float16,0,0.014300799369812012
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,24,8,128,1,float16,fp8,0,0.01255040019750595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,24,8,128,1,fp8,fp8,0,0.012689599394798278
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,24,24,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,24,24,128,1,float16,fp8,0,0.010529600083827972
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,24,1,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,24,24,128,1,fp8,fp8,0,0.010529600083827972
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,24,1,128,1,float16,fp8,0,0.010527999699115753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,24,1,128,1,fp8,fp8,0,0.010462400317192078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,24,2,128,1,float16,float16,0,0.010598400235176086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,24,2,128,1,float16,fp8,0,0.010500799864530563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,24,4,128,1,float16,fp8,0,0.010449600219726563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,24,2,128,1,fp8,fp8,0,0.01056160032749176
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,24,4,128,1,float16,float16,0,0.01058719977736473
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,24,4,128,1,fp8,fp8,0,0.010471999645233154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,24,8,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,24,8,128,1,float16,fp8,0,0.010513599961996078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,24,8,128,1,fp8,fp8,0,0.010462400317192078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,24,24,128,1,float16,float16,0,0.010491199791431427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,24,24,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,24,1,128,1,float16,float16,0,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,24,1,128,1,float16,fp8,0,0.010550399869680404
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,24,24,128,1,fp8,fp8,0,0.01048159971833229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,24,1,128,1,fp8,fp8,0,0.010425599664449692
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,24,2,128,1,float16,fp8,0,0.01035040020942688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,24,2,128,1,float16,float16,0,0.010531199723482132
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,24,2,128,1,fp8,fp8,0,0.010416000336408614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,24,4,128,1,float16,float16,0,0.010328000038862228
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,24,4,128,1,float16,fp8,0,0.011043199896812439
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,24,4,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,24,8,128,1,float16,float16,0,0.01043360009789467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,24,8,128,1,float16,fp8,0,0.010424000024795533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,24,8,128,1,fp8,fp8,0,0.010444799810647965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,24,24,128,1,float16,float16,0,0.010598400235176086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,24,24,128,1,float16,fp8,0,0.010449600219726563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,24,24,128,1,fp8,fp8,0,0.010372799634933472
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,24,1,128,1,float16,float16,0,0.010411199927330018
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,24,1,128,1,float16,fp8,0,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,24,1,128,1,fp8,fp8,0,0.010440000146627427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,24,2,128,1,float16,float16,0,0.010424000024795533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,24,2,128,1,float16,fp8,0,0.010380800068378448
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,24,2,128,1,fp8,fp8,0,0.010409600287675857
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,24,4,128,1,float16,float16,0,0.0104032002389431
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,24,4,128,1,float16,fp8,0,0.010440000146627427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,24,4,128,1,fp8,fp8,0,0.010444799810647965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,24,8,128,1,float16,float16,0,0.0105103999376297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,24,8,128,1,float16,fp8,0,0.010371199995279311
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,24,8,128,1,fp8,fp8,0,0.010443200170993806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,24,1,128,1,float16,float16,0,0.1275696039199829
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,24,1,128,1,float16,fp8,0,0.1171504020690918
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,24,1,128,1,fp8,fp8,0,0.11700479984283448
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,24,2,128,1,float16,float16,0,0.12726720571517944
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,24,2,128,1,float16,fp8,0,0.11697119474411011
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,24,2,128,1,fp8,fp8,0,0.11712479591369629
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,24,4,128,1,float16,float16,0,0.12783679962158204
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,24,4,128,1,float16,fp8,0,0.11697920560836791
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,24,4,128,1,fp8,fp8,0,0.11780639886856079
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,24,8,128,1,float16,float16,0,0.12756160497665406
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,24,8,128,1,float16,fp8,0,0.11739200353622437
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,24,8,128,1,fp8,fp8,0,0.11810400485992431
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,24,24,128,1,float16,float16,0,0.06989120244979859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,24,24,128,1,float16,fp8,0,0.0637503981590271
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,24,24,128,1,fp8,fp8,0,0.06368640065193176
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,24,1,128,1,float16,float16,0,0.06867200136184692
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,24,1,128,1,float16,fp8,0,0.06371999979019165
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,24,1,128,1,fp8,fp8,0,0.06376799941062927
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,24,2,128,1,float16,float16,0,0.06863679885864257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,24,2,128,1,float16,fp8,0,0.0637279987335205
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,24,2,128,1,fp8,fp8,0,0.06386399865150452
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,24,4,128,1,float16,float16,0,0.06807839870452881
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,24,4,128,1,float16,fp8,0,0.06377120018005371
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,24,4,128,1,fp8,fp8,0,0.06372320055961608
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,24,8,128,1,float16,float16,0,0.06802240014076233
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,24,8,128,1,float16,fp8,0,0.06376000046730042
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,24,24,128,1,float16,float16,0,0.03928160071372986
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,24,8,128,1,fp8,fp8,0,0.06378239989280701
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,24,24,128,1,float16,fp8,0,0.03720000088214874
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,24,24,128,1,fp8,fp8,0,0.03712159991264343
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,24,1,128,1,float16,float16,0,0.03916319906711578
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,24,1,128,1,float16,fp8,0,0.03716799914836884
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,24,1,128,1,fp8,fp8,0,0.037134400010108946
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,24,2,128,1,float16,float16,0,0.03920480012893677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,24,2,128,1,float16,fp8,0,0.037092798948287965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,24,2,128,1,fp8,fp8,0,0.0370959997177124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,24,4,128,1,float16,float16,0,0.03914400041103363
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,24,4,128,1,float16,fp8,0,0.037161600589752194
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,24,4,128,1,fp8,fp8,0,0.03712320029735565
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,24,8,128,1,float16,float16,0,0.03924640119075775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,24,8,128,1,float16,fp8,0,0.03721120059490204
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,24,8,128,1,fp8,fp8,0,0.03720000088214874
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,24,24,128,1,float16,float16,0,0.024907200038433074
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,24,24,128,1,float16,fp8,0,0.024699200689792634
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,24,1,128,1,float16,float16,0,0.02484479993581772
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,24,24,128,1,fp8,fp8,0,0.024438400566577912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,24,1,128,1,fp8,fp8,0,0.023100799322128295
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,24,1,128,1,float16,fp8,0,0.02284960001707077
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,24,2,128,1,float16,float16,0,0.024900799989700316
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,24,4,128,1,float16,float16,0,0.02496960014104843
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,24,2,128,1,fp8,fp8,0,0.0227743998169899
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,24,2,128,1,float16,fp8,0,0.023056000471115112
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,24,4,128,1,float16,fp8,0,0.022833600640296936
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,24,4,128,1,fp8,fp8,0,0.022806400060653688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,24,8,128,1,float16,float16,0,0.02499680072069168
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,24,8,128,1,float16,fp8,0,0.022867199778556824
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,24,8,128,1,fp8,fp8,0,0.022780799865722658
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,24,24,128,1,float16,float16,0,0.017236800491809846
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,24,24,128,1,float16,fp8,0,0.016531200706958772
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,24,24,128,1,fp8,fp8,0,0.016521599888801575
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,24,1,128,1,float16,fp8,0,0.016553600132465363
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,24,1,128,1,float16,float16,0,0.016707199811935424
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,24,1,128,1,fp8,fp8,0,0.0165583997964859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,24,2,128,1,float16,float16,0,0.016708800196647645
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,24,2,128,1,float16,fp8,0,0.01655520051717758
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,24,2,128,1,fp8,fp8,0,0.01661760061979294
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,24,4,128,1,float16,float16,0,0.016764800250530242
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,24,4,128,1,float16,fp8,0,0.015532800555229187
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,24,4,128,1,fp8,fp8,0,0.016518400609493257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,24,8,128,1,float16,float16,0,0.01674239933490753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,24,8,128,1,float16,fp8,0,0.01656640022993088
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,24,8,128,1,fp8,fp8,0,0.016612799465656282
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,24,24,128,1,float16,float16,0,0.014427199959754944
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,24,24,128,1,float16,fp8,0,0.01247519999742508
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,24,24,128,1,fp8,fp8,0,0.012921600043773651
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,24,1,128,1,float16,float16,0,0.013731199502944946
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,24,1,128,1,float16,fp8,0,0.012478400021791458
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,24,1,128,1,fp8,fp8,0,0.012537600100040435
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,24,2,128,1,float16,float16,0,0.01443839967250824
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,24,2,128,1,float16,fp8,0,0.012510399520397186
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,24,4,128,1,float16,float16,0,0.014444799721240997
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,24,2,128,1,fp8,fp8,0,0.013195200264453888
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,24,4,128,1,float16,fp8,0,0.012455999851226807
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,24,4,128,1,fp8,fp8,0,0.012615999579429627
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,24,8,128,1,float16,float16,0,0.01305440068244934
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,24,24,128,1,float16,float16,0,0.01111999973654747
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,24,8,128,1,fp8,fp8,0,0.012571200728416443
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,24,8,128,1,float16,fp8,0,0.012520000338554382
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,24,24,128,1,float16,fp8,0,0.010523200035095215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,24,1,128,1,float16,float16,0,0.010401599854230881
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,24,24,128,1,fp8,fp8,0,0.0105103999376297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,24,1,128,1,float16,fp8,0,0.010435199737548828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,24,1,128,1,fp8,fp8,0,0.010473600029945374
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,24,2,128,1,float16,float16,0,0.01233920007944107
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,24,2,128,1,float16,fp8,0,0.01048320010304451
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,24,2,128,1,fp8,fp8,0,0.010416000336408614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,24,4,128,1,float16,float16,0,0.011860799789428712
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,24,4,128,1,fp8,fp8,0,0.010505600273609162
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,24,8,128,1,float16,float16,0,0.01048479974269867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,24,4,128,1,float16,fp8,0,0.010332799702882766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,24,8,128,1,float16,fp8,0,0.010425599664449692
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,24,8,128,1,fp8,fp8,0,0.01053759977221489
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,24,24,128,1,float16,float16,0,0.01048320010304451
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,24,24,128,1,float16,fp8,0,0.010440000146627427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,24,24,128,1,fp8,fp8,0,0.010308799892663955
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,24,1,128,1,float16,float16,0,0.010486400127410889
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,24,1,128,1,float16,fp8,0,0.010358399897813796
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,24,1,128,1,fp8,fp8,0,0.010366400331258773
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,24,2,128,1,float16,float16,0,0.010515200346708298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,24,2,128,1,float16,fp8,0,0.01048159971833229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,24,2,128,1,fp8,fp8,0,0.010452800244092942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,24,4,128,1,float16,float16,0,0.010462400317192078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,24,4,128,1,float16,fp8,0,0.010441599786281586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,24,8,128,1,float16,float16,0,0.01096159964799881
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,24,4,128,1,fp8,fp8,0,0.010494399815797806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,24,8,128,1,float16,fp8,0,0.010385599732398988
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,24,8,128,1,fp8,fp8,0,0.010363200306892395
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,24,24,128,1,float16,float16,0,0.010550399869680404
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,24,24,128,1,float16,fp8,0,0.010480000078678131
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,24,24,128,1,fp8,fp8,0,0.010419200360774993
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,24,1,128,1,float16,float16,0,0.010550399869680404
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,24,1,128,1,float16,fp8,0,0.010390400141477584
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,24,2,128,1,float16,float16,0,0.010500799864530563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,24,1,128,1,fp8,fp8,0,0.010427200049161912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,24,2,128,1,float16,fp8,0,0.010391999781131745
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,24,2,128,1,fp8,fp8,0,0.010334400087594986
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,24,4,128,1,float16,float16,0,0.010416000336408614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,24,4,128,1,fp8,fp8,0,0.010320000350475311
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,24,4,128,1,float16,fp8,0,0.010369600355625152
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,24,8,128,1,float16,float16,0,0.010416000336408614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,24,8,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,24,8,128,1,fp8,fp8,0,0.010401599854230881
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,16,1,128,1,float16,fp8,0,5.421396636962891
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,16,1,128,1,fp8,fp8,0,5.3546192169189455
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,16,1,128,1,float16,float16,0,8.560460662841797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,16,2,128,1,fp8,fp8,0,5.326353454589844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,16,2,128,1,float16,fp8,0,5.686459350585937
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,16,2,128,1,float16,float16,0,8.91204605102539
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,16,4,128,1,float16,float16,0,8.703321838378907
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,16,4,128,1,float16,fp8,0,5.3989215850830075
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,16,4,128,1,fp8,fp8,0,5.668560028076172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,16,8,128,1,float16,fp8,0,5.684268951416016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,16,8,128,1,float16,float16,0,9.492171478271484
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,16,8,128,1,fp8,fp8,0,5.518948745727539
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,16,1,128,1,float16,float16,0,4.325980758666992
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,16,16,128,1,float16,fp8,0,2.775265693664551
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,16,16,128,1,fp8,fp8,0,2.747230339050293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,16,1,128,1,float16,fp8,0,2.8115583419799806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,16,1,128,1,fp8,fp8,0,2.7534080505371095
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,16,2,128,1,float16,float16,0,4.033287811279297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,16,2,128,1,float16,fp8,0,2.8127216339111327
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,16,2,128,1,fp8,fp8,0,2.7337696075439455
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,16,4,128,1,float16,fp8,0,2.7660192489624023
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,16,4,128,1,float16,float16,0,4.339939117431641
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,16,4,128,1,fp8,fp8,0,2.728507232666016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,16,8,128,1,fp8,fp8,0,2.7601903915405273
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,16,8,128,1,float16,fp8,0,2.8459936141967774
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,16,8,128,1,float16,float16,0,4.412902450561523
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,16,16,128,1,float16,fp8,0,1.4599072456359863
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,16,16,128,1,fp8,fp8,0,1.5898159980773925
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,16,1,128,1,float16,float16,0,1.6158111572265625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,16,1,128,1,float16,fp8,0,1.409921646118164
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,16,1,128,1,fp8,fp8,0,1.8122655868530273
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,16,2,128,1,float16,float16,0,1.568838405609131
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,16,2,128,1,float16,fp8,0,1.4324895858764648
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,16,2,128,1,fp8,fp8,0,1.5692527770996094
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,16,4,128,1,float16,fp8,0,1.4243151664733886
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,16,4,128,1,float16,float16,0,1.7873920440673827
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,16,4,128,1,fp8,fp8,0,1.4220128059387207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,16,8,128,1,float16,float16,0,2.5247663497924804
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,16,8,128,1,float16,fp8,0,1.4256815910339355
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,16,8,128,1,fp8,fp8,0,1.7184976577758788
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,16,16,128,1,float16,fp8,0,0.8078800201416015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,16,16,128,1,fp8,fp8,0,0.7947663784027099
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,16,1,128,1,float16,float16,0,0.8797856330871582
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,16,1,128,1,float16,fp8,0,0.7853839874267579
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,16,1,128,1,fp8,fp8,0,0.7821760177612305
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,16,2,128,1,float16,float16,0,0.8799471855163574
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,16,2,128,1,float16,fp8,0,0.8210288047790527
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,16,2,128,1,fp8,fp8,0,0.7796624183654786
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,16,4,128,1,float16,float16,0,0.8849087715148926
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,16,4,128,1,float16,fp8,0,1.0054736137390137
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,16,4,128,1,fp8,fp8,0,0.8277392387390137
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,16,8,128,1,float16,float16,0,0.8946479797363281
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,16,8,128,1,float16,fp8,0,0.8501456260681153
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,16,8,128,1,fp8,fp8,0,0.7771296024322509
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,16,1,128,1,float16,float16,0,4.660446548461914
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,16,1,128,1,float16,fp8,0,3.2253841400146483
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,16,1,128,1,fp8,fp8,0,3.153376007080078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,16,2,128,1,float16,float16,0,5.019631958007812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,16,2,128,1,float16,fp8,0,3.3820960998535154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,16,2,128,1,fp8,fp8,0,3.3028095245361326
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,16,4,128,1,float16,float16,0,5.584206390380859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,16,4,128,1,float16,fp8,0,3.466299057006836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,16,4,128,1,fp8,fp8,0,3.2315616607666016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,16,8,128,1,float16,float16,0,5.262510299682617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,16,8,128,1,float16,fp8,0,3.6320606231689454
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,16,8,128,1,fp8,fp8,0,3.2953601837158204
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,16,16,128,1,float16,float16,0,3.3284046173095705
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,16,16,128,1,float16,float16,0,1.924443244934082
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,16,16,128,1,float16,fp8,0,1.7284767150878906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,16,16,128,1,fp8,fp8,0,1.978892707824707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,16,1,128,1,float16,float16,0,1.81951847076416
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,16,1,128,1,float16,fp8,0,1.5921600341796875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,16,16,128,1,float16,float16,0,1.6276432037353517
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,16,2,128,1,float16,float16,0,1.8571344375610352
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,16,2,128,1,float16,fp8,0,1.6152095794677734
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,16,1,128,1,fp8,fp8,0,2.0841264724731445
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,16,16,128,1,float16,float16,0,0.8886079788208008
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,16,4,128,1,float16,float16,0,1.8279727935791015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,16,2,128,1,fp8,fp8,0,1.8378511428833009
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,16,4,128,1,float16,fp8,0,1.576475238800049
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,16,4,128,1,fp8,fp8,0,1.603505516052246
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,16,8,128,1,float16,fp8,0,1.6345104217529296
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,16,8,128,1,fp8,fp8,0,1.6949359893798828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,16,16,128,1,float16,float16,0,0.9794560432434082
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,16,8,128,1,float16,float16,0,2.576572799682617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,16,16,128,1,float16,fp8,0,0.9343567848205566
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,16,16,128,1,fp8,fp8,0,0.9756527900695801
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,16,1,128,1,float16,float16,0,0.99923677444458
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,16,1,128,1,fp8,fp8,0,0.8550800323486328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,16,1,128,1,float16,fp8,0,1.0958144187927246
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,16,2,128,1,float16,float16,0,0.9598912239074707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,16,2,128,1,float16,fp8,0,0.9902447700500489
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,16,2,128,1,fp8,fp8,0,0.8534416198730469
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,16,4,128,1,float16,float16,0,0.9522000312805176
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,16,4,128,1,float16,fp8,0,0.8685935974121094
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,16,4,128,1,fp8,fp8,0,0.8485648155212402
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,16,8,128,1,float16,fp8,0,0.845740795135498
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,16,8,128,1,float16,float16,0,0.9820048332214355
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,16,8,128,1,fp8,fp8,0,1.1849375724792481
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,16,16,128,1,float16,fp8,0,0.5083199977874756
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,16,16,128,1,float16,float16,0,0.674283218383789
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,16,16,128,1,fp8,fp8,0,0.49425439834594725
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,16,1,128,1,float16,float16,0,0.5500783920288086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,16,1,128,1,float16,fp8,0,0.5602511882781982
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,16,1,128,1,fp8,fp8,0,0.47766880989074706
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,16,2,128,1,float16,float16,0,0.5424399852752686
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,16,2,128,1,float16,fp8,0,0.476091194152832
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,16,2,128,1,fp8,fp8,0,0.4765632152557373
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,16,4,128,1,float16,float16,0,0.5436240196228027
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,16,4,128,1,float16,fp8,0,0.47767839431762693
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,16,4,128,1,fp8,fp8,0,0.47726879119873045
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,16,8,128,1,float16,fp8,0,0.4783599853515625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,16,8,128,1,float16,float16,0,0.5599120140075684
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,16,8,128,1,fp8,fp8,0,0.47777600288391114
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,16,1,128,1,float16,fp8,0,2.2520816802978514
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,16,1,128,1,fp8,fp8,0,2.179097557067871
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,16,1,128,1,float16,float16,0,2.9318784713745116
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,16,2,128,1,float16,fp8,0,2.356612777709961
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,16,2,128,1,float16,float16,0,3.2433326721191404
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,16,2,128,1,fp8,fp8,0,2.4357887268066407
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,16,4,128,1,float16,float16,0,2.695489692687988
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,16,4,128,1,float16,fp8,0,2.3615264892578125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,16,4,128,1,fp8,fp8,0,2.4951215744018556
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,16,8,128,1,float16,fp8,0,2.1903247833251953
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,16,8,128,1,float16,float16,0,2.9323503494262697
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,16,16,128,1,float16,fp8,0,1.2006256103515625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,16,16,128,1,float16,float16,0,1.373020839691162
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,16,8,128,1,fp8,fp8,0,2.3803583145141602
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,16,16,128,1,fp8,fp8,0,1.417897605895996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,16,1,128,1,float16,float16,0,1.3161631584167481
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,16,1,128,1,fp8,fp8,0,1.34814395904541
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,16,1,128,1,float16,fp8,0,1.4989583969116211
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,16,2,128,1,float16,float16,0,1.330019187927246
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,16,2,128,1,float16,fp8,0,1.2973903656005858
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,16,2,128,1,fp8,fp8,0,1.1512255668640137
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,16,4,128,1,float16,float16,0,1.3077424049377442
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,16,4,128,1,float16,fp8,0,1.340937614440918
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,16,4,128,1,fp8,fp8,0,1.1411168098449707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,16,8,128,1,float16,float16,0,1.3071855545043944
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,16,8,128,1,fp8,fp8,0,1.1520480155944823
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,16,8,128,1,float16,fp8,0,1.7295215606689454
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,16,16,128,1,float16,float16,0,0.7191792011260987
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,16,16,128,1,float16,fp8,0,0.7269728183746338
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,16,16,128,1,fp8,fp8,0,0.6461840152740479
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,16,1,128,1,float16,fp8,0,0.6210447788238526
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,16,1,128,1,float16,float16,0,0.7021615982055665
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,16,1,128,1,fp8,fp8,0,0.9573599815368652
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,16,2,128,1,float16,float16,0,0.6920671939849854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,16,2,128,1,fp8,fp8,0,0.6368607997894287
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,16,2,128,1,float16,fp8,0,0.6247759819030761
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,16,4,128,1,float16,float16,0,0.6980239868164062
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,16,4,128,1,float16,fp8,0,0.625435209274292
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,16,4,128,1,fp8,fp8,0,0.6309408187866211
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,16,8,128,1,float16,float16,0,0.7010752201080322
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,16,16,128,1,float16,float16,0,0.4201536178588867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,16,8,128,1,float16,fp8,0,0.6229407787322998
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,16,8,128,1,fp8,fp8,0,0.6249536037445068
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,16,16,128,1,float16,fp8,0,0.379911994934082
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,16,16,128,1,fp8,fp8,0,0.3715728044509888
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,16,1,128,1,float16,fp8,0,0.3544928073883057
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,16,1,128,1,float16,float16,0,0.4087711811065674
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,16,1,128,1,fp8,fp8,0,0.3854207992553711
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,16,2,128,1,fp8,fp8,0,0.3571360111236572
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,16,2,128,1,float16,float16,0,0.3957551956176758
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,16,2,128,1,float16,fp8,0,0.35445919036865237
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,16,4,128,1,float16,float16,0,0.4108704090118408
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,16,4,128,1,float16,fp8,0,0.35301759243011477
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,16,8,128,1,float16,float16,0,0.40241279602050783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,16,4,128,1,fp8,fp8,0,0.35500640869140626
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,16,8,128,1,float16,fp8,0,0.3866559982299805
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,16,8,128,1,fp8,fp8,0,0.3537359952926636
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,16,1,128,1,float16,fp8,0,2.8894176483154297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,16,1,128,1,fp8,fp8,0,2.8762527465820313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,16,1,128,1,float16,float16,0,3.899235153198242
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,16,2,128,1,float16,float16,0,4.3598480224609375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,16,2,128,1,fp8,fp8,0,2.8780927658081055
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,16,2,128,1,float16,fp8,0,2.9455135345458983
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,16,4,128,1,float16,fp8,0,3.407115173339844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,16,4,128,1,float16,float16,0,4.481742477416992
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,16,4,128,1,fp8,fp8,0,2.8886064529418944
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,16,8,128,1,float16,float16,0,4.442025756835937
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,16,16,128,1,float16,float16,0,1.7279151916503905
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,16,8,128,1,float16,fp8,0,2.9732656478881836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,16,8,128,1,fp8,fp8,0,2.9816911697387694
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,16,16,128,1,float16,fp8,0,1.7055919647216797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,16,16,128,1,fp8,fp8,0,1.874964714050293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,16,1,128,1,float16,fp8,0,1.5096192359924316
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,16,1,128,1,float16,float16,0,1.9184015274047852
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,16,1,128,1,fp8,fp8,0,1.4784463882446288
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,16,2,128,1,float16,float16,0,1.5968128204345704
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,16,2,128,1,float16,fp8,0,1.470531177520752
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,16,2,128,1,fp8,fp8,0,2.106950378417969
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,16,4,128,1,float16,float16,0,1.649888038635254
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,16,4,128,1,fp8,fp8,0,1.4728943824768066
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,16,4,128,1,float16,fp8,0,1.967300796508789
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,16,16,128,1,float16,float16,0,0.9097743988037109
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,16,8,128,1,float16,float16,0,1.9146543502807618
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,16,8,128,1,float16,fp8,0,1.9214431762695312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,16,8,128,1,fp8,fp8,0,1.4772944450378418
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,16,16,128,1,fp8,fp8,0,0.8272496223449707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,16,16,128,1,float16,fp8,0,1.3521743774414063
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,16,1,128,1,float16,float16,0,0.8595744132995605
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,16,1,128,1,float16,fp8,0,0.7938447952270508
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,16,2,128,1,float16,float16,0,0.911297607421875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,16,1,128,1,fp8,fp8,0,1.0505328178405762
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,16,2,128,1,float16,fp8,0,0.7868256092071533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,16,2,128,1,fp8,fp8,0,1.072383975982666
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,16,4,128,1,float16,float16,0,0.8654144287109375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,16,4,128,1,float16,fp8,0,0.7796256065368652
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,16,4,128,1,fp8,fp8,0,0.7746352195739746
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,16,8,128,1,float16,fp8,0,0.780467176437378
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,16,8,128,1,float16,float16,0,1.1335087776184083
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,16,16,128,1,float16,float16,0,0.5110720157623291
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,16,8,128,1,fp8,fp8,0,0.7744128227233886
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,16,16,128,1,float16,fp8,0,0.5861055850982666
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,16,16,128,1,fp8,fp8,0,0.4514175891876221
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,16,1,128,1,float16,float16,0,0.47342720031738283
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,16,1,128,1,float16,fp8,0,0.5515039920806885
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,16,1,128,1,fp8,fp8,0,0.42641282081604004
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,16,2,128,1,float16,float16,0,0.48082399368286133
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,16,2,128,1,float16,fp8,0,0.4683407783508301
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,16,4,128,1,float16,float16,0,0.4699999809265137
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,16,2,128,1,fp8,fp8,0,0.5286704063415527
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,16,4,128,1,float16,fp8,0,0.4303408145904541
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,16,4,128,1,fp8,fp8,0,0.44545760154724123
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,16,8,128,1,float16,fp8,0,0.4269264221191406
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,16,8,128,1,float16,float16,0,0.4833072185516357
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,16,8,128,1,fp8,fp8,0,0.4511007785797119
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,16,16,128,1,float16,float16,0,0.28982560634613036
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,16,16,128,1,float16,fp8,0,0.25955519676208494
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,16,16,128,1,fp8,fp8,0,0.25910561084747313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,16,1,128,1,float16,float16,0,0.2738591909408569
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,16,1,128,1,float16,fp8,0,0.249947190284729
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,16,1,128,1,fp8,fp8,0,0.24898879528045653
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,16,2,128,1,float16,float16,0,0.2699376106262207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,16,2,128,1,float16,fp8,0,0.24971840381622315
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,16,2,128,1,fp8,fp8,0,0.2494976043701172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,16,4,128,1,float16,float16,0,0.2741616010665894
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,16,4,128,1,float16,fp8,0,0.24880640506744384
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,16,4,128,1,fp8,fp8,0,0.2507728099822998
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,16,8,128,1,float16,float16,0,0.2819103956222534
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,16,8,128,1,float16,fp8,0,0.24852960109710692
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,16,8,128,1,fp8,fp8,0,0.24864640235900878
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,16,1,128,1,float16,fp8,0,1.7046735763549805
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,16,1,128,1,fp8,fp8,0,1.6982879638671875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,16,1,128,1,float16,float16,0,1.998624038696289
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,16,2,128,1,float16,float16,0,1.8036159515380858
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,16,2,128,1,float16,fp8,0,1.7064847946166992
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,16,2,128,1,fp8,fp8,0,1.7082704544067382
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,16,4,128,1,float16,fp8,0,1.8629087448120116
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,16,4,128,1,float16,float16,0,2.730641555786133
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,16,4,128,1,fp8,fp8,0,1.7218448638916015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,16,8,128,1,float16,float16,0,1.9185199737548828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,16,8,128,1,float16,fp8,0,1.7331663131713868
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,16,16,128,1,float16,float16,0,1.0683648109436035
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,16,16,128,1,fp8,fp8,0,0.9381520271301269
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,16,8,128,1,fp8,fp8,0,2.009390449523926
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,16,16,128,1,float16,fp8,0,1.3760160446166991
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,16,1,128,1,float16,float16,0,1.227660846710205
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,16,1,128,1,fp8,fp8,0,0.8984959602355957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,16,1,128,1,float16,fp8,0,0.8913760185241699
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,16,2,128,1,float16,float16,0,1.0584511756896973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,16,2,128,1,fp8,fp8,0,0.8919695854187012
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,16,4,128,1,float16,float16,0,0.9606831550598145
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,16,2,128,1,float16,fp8,0,1.219495964050293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,16,4,128,1,float16,fp8,0,1.3181967735290527
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,16,4,128,1,fp8,fp8,0,0.8902064323425293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,16,8,128,1,float16,fp8,0,0.8835904121398925
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,16,8,128,1,float16,float16,0,1.1365839958190918
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,16,16,128,1,float16,float16,0,0.5738063812255859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,16,16,128,1,float16,fp8,0,0.5014880180358887
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,16,8,128,1,fp8,fp8,0,0.909881591796875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,16,16,128,1,fp8,fp8,0,0.5862815856933594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,16,1,128,1,float16,float16,0,0.5278304100036622
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,16,1,128,1,float16,fp8,0,0.47859840393066405
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,16,1,128,1,fp8,fp8,0,0.48830242156982423
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,16,2,128,1,float16,float16,0,0.695414400100708
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,16,2,128,1,float16,fp8,0,0.571233606338501
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,16,2,128,1,fp8,fp8,0,0.4773280143737793
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,16,4,128,1,float16,float16,0,0.5280960083007813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,16,4,128,1,float16,fp8,0,0.5807295799255371
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,16,4,128,1,fp8,fp8,0,0.5713488101959229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,16,8,128,1,float16,float16,0,0.5418496131896973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,16,8,128,1,float16,fp8,0,0.4750976085662842
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,16,16,128,1,float16,fp8,0,0.28275361061096194
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,16,16,128,1,float16,float16,0,0.31911840438842776
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,16,16,128,1,fp8,fp8,0,0.2829024076461792
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,16,8,128,1,fp8,fp8,0,0.4766848087310791
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,16,1,128,1,float16,float16,0,0.2913088083267212
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,16,1,128,1,fp8,fp8,0,0.26672160625457764
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,16,1,128,1,float16,fp8,0,0.2691920042037964
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,16,2,128,1,float16,float16,0,0.2893791913986206
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,16,2,128,1,float16,fp8,0,0.2680624008178711
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,16,2,128,1,fp8,fp8,0,0.26905601024627684
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,16,4,128,1,float16,float16,0,0.2973504066467285
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,16,4,128,1,float16,fp8,0,0.26583199501037597
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,16,4,128,1,fp8,fp8,0,0.26831998825073244
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,16,8,128,1,float16,float16,0,0.3026864051818848
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,16,8,128,1,float16,fp8,0,0.26973280906677244
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,16,16,128,1,float16,float16,0,0.18574880361557006
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,16,16,128,1,float16,fp8,0,0.16666560173034667
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,16,8,128,1,fp8,fp8,0,0.26927359104156495
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,16,16,128,1,fp8,fp8,0,0.17023680210113526
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,16,1,128,1,float16,float16,0,0.17098400592803956
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,16,1,128,1,float16,fp8,0,0.1598080039024353
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,16,1,128,1,fp8,fp8,0,0.15928800106048585
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,16,2,128,1,float16,float16,0,0.17084640264511108
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,16,2,128,1,float16,fp8,0,0.15926400423049927
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,16,2,128,1,fp8,fp8,0,0.16078560352325438
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,16,4,128,1,float16,float16,0,0.1738160014152527
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,16,4,128,1,float16,fp8,0,0.15973119735717772
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,16,4,128,1,fp8,fp8,0,0.15922399759292602
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,16,8,128,1,float16,fp8,0,0.1597391963005066
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,16,8,128,1,float16,float16,0,0.1785215973854065
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,16,8,128,1,fp8,fp8,0,0.158624005317688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,16,1,128,1,float16,fp8,0,1.6454479217529296
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,16,1,128,1,fp8,fp8,0,1.6379711151123046
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,16,1,128,1,float16,float16,0,1.8730287551879883
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,16,2,128,1,float16,fp8,0,1.6459632873535157
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,16,2,128,1,float16,float16,0,1.9327823638916015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,16,2,128,1,fp8,fp8,0,1.7907215118408204
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,16,4,128,1,float16,float16,0,1.7578912734985352
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,16,4,128,1,fp8,fp8,0,1.637923240661621
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,16,4,128,1,float16,fp8,0,1.797012710571289
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,16,8,128,1,float16,float16,0,2.1429967880249023
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,16,16,128,1,float16,float16,0,1.0324064254760743
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,16,8,128,1,fp8,fp8,0,1.639441680908203
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,16,16,128,1,float16,fp8,0,1.3103551864624023
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,16,8,128,1,float16,fp8,0,2.287785530090332
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,16,16,128,1,fp8,fp8,0,0.9943663597106933
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,16,1,128,1,float16,float16,0,0.8838720321655273
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,16,1,128,1,float16,fp8,0,0.9574624061584472
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,16,1,128,1,fp8,fp8,0,0.8844688415527344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,16,2,128,1,float16,float16,0,0.9071328163146972
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,16,2,128,1,float16,fp8,0,0.9019136428833008
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,16,2,128,1,fp8,fp8,0,0.8472623825073242
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,16,4,128,1,float16,float16,0,0.9101327896118164
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,16,4,128,1,float16,fp8,0,1.197481632232666
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,16,4,128,1,fp8,fp8,0,0.8573344230651856
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,16,8,128,1,float16,float16,0,0.9448816299438476
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,16,8,128,1,float16,fp8,0,0.8505951881408691
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,16,16,128,1,float16,float16,0,0.5313407897949218
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,16,16,128,1,float16,fp8,0,0.4860976219177246
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,16,8,128,1,fp8,fp8,0,0.8506527900695801
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,16,16,128,1,fp8,fp8,0,0.4868879795074463
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,16,1,128,1,float16,float16,0,0.48288798332214355
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,16,1,128,1,float16,fp8,0,0.4771103858947754
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,16,1,128,1,fp8,fp8,0,0.4582352161407471
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,16,2,128,1,float16,float16,0,0.4792975902557373
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,16,2,128,1,float16,fp8,0,0.4831071853637695
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,16,2,128,1,fp8,fp8,0,0.47641758918762206
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,16,4,128,1,float16,float16,0,0.49309439659118653
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,16,4,128,1,float16,fp8,0,0.464353609085083
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,16,4,128,1,fp8,fp8,0,0.47485918998718263
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,16,8,128,1,float16,float16,0,0.5046783924102783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,16,8,128,1,float16,fp8,0,0.4524528026580811
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,16,8,128,1,fp8,fp8,0,0.48032798767089846
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,16,16,128,1,float16,float16,0,0.2905328035354614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,16,16,128,1,float16,fp8,0,0.26618399620056155
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,16,16,128,1,fp8,fp8,0,0.2700767993927002
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,16,1,128,1,float16,float16,0,0.26905920505523684
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,16,1,128,1,float16,fp8,0,0.24574880599975585
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,16,1,128,1,fp8,fp8,0,0.2478559970855713
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,16,2,128,1,float16,float16,0,0.26624159812927245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,16,2,128,1,float16,fp8,0,0.2602351903915405
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,16,2,128,1,fp8,fp8,0,0.24744799137115478
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,16,4,128,1,float16,float16,0,0.2688704013824463
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,16,4,128,1,float16,fp8,0,0.25107359886169434
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,16,4,128,1,fp8,fp8,0,0.24663839340209961
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,16,8,128,1,float16,float16,0,0.2754688024520874
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,16,16,128,1,float16,float16,0,0.16921279430389405
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,16,8,128,1,float16,fp8,0,0.24707520008087158
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,16,8,128,1,fp8,fp8,0,0.2483936071395874
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,16,16,128,1,float16,fp8,0,0.15280319452285768
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,16,16,128,1,fp8,fp8,0,0.1523519992828369
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,16,1,128,1,float16,float16,0,0.15055040121078492
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,16,1,128,1,float16,fp8,0,0.14301120042800902
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,16,1,128,1,fp8,fp8,0,0.1436735987663269
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,16,2,128,1,float16,float16,0,0.1493391990661621
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,16,2,128,1,float16,fp8,0,0.14153920412063598
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,16,2,128,1,fp8,fp8,0,0.1430191993713379
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,16,4,128,1,float16,float16,0,0.15178240537643434
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,16,4,128,1,float16,fp8,0,0.14211039543151854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,16,4,128,1,fp8,fp8,0,0.14369440078735352
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,16,8,128,1,float16,float16,0,0.16131680011749266
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,16,16,128,1,float16,float16,0,0.10281120538711548
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,16,8,128,1,fp8,fp8,0,0.14207839965820312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,16,8,128,1,float16,fp8,0,0.14259519577026367
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,16,16,128,1,float16,fp8,0,0.09447519779205323
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,16,16,128,1,fp8,fp8,0,0.09430400133132935
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,16,1,128,1,float16,fp8,0,0.09171199798583984
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,16,1,128,1,float16,float16,0,0.09534239768981934
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,16,1,128,1,fp8,fp8,0,0.09084960222244262
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,16,2,128,1,float16,float16,0,0.09595839977264405
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,16,2,128,1,fp8,fp8,0,0.09148960113525391
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,16,2,128,1,float16,fp8,0,0.09112160205841065
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,16,4,128,1,float16,float16,0,0.09682880043983459
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,16,4,128,1,float16,fp8,0,0.09041759967803956
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,16,4,128,1,fp8,fp8,0,0.09021120071411133
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,16,8,128,1,float16,float16,0,0.10030720233917237
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,16,8,128,1,float16,fp8,0,0.09042239785194398
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,16,8,128,1,fp8,fp8,0,0.09013760089874268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,16,1,128,1,float16,float16,0,1.053656005859375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,16,1,128,1,fp8,fp8,0,1.0155183792114257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,16,1,128,1,float16,fp8,0,1.0149007797241212
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,16,2,128,1,float16,fp8,0,1.0123184204101563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,16,2,128,1,fp8,fp8,0,1.0125311851501464
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,16,2,128,1,float16,float16,0,1.2781583786010742
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,16,4,128,1,float16,fp8,0,1.010311985015869
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,16,4,128,1,float16,float16,0,1.0647647857666016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,16,4,128,1,fp8,fp8,0,1.2980976104736328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,16,8,128,1,float16,float16,0,1.1190719604492188
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,16,8,128,1,float16,fp8,0,1.2560303688049317
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,16,8,128,1,fp8,fp8,0,1.010921573638916
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,16,16,128,1,float16,float16,0,0.6409520149230957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,16,16,128,1,float16,fp8,0,0.5940896034240722
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,16,16,128,1,fp8,fp8,0,0.5805007934570312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,16,1,128,1,float16,float16,0,0.5414095878601074
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,16,1,128,1,fp8,fp8,0,0.5355887889862061
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,16,1,128,1,float16,fp8,0,0.7225584030151367
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,16,2,128,1,float16,float16,0,0.55141282081604
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,16,2,128,1,float16,fp8,0,0.5275152206420899
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,16,2,128,1,fp8,fp8,0,0.5298960208892822
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,16,4,128,1,float16,float16,0,0.6346704006195069
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,16,4,128,1,float16,fp8,0,0.5654687881469727
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,16,4,128,1,fp8,fp8,0,0.5259679794311524
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,16,8,128,1,float16,float16,0,0.5859007835388184
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,16,16,128,1,float16,float16,0,0.33879039287567136
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,16,8,128,1,float16,fp8,0,0.525708818435669
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,16,8,128,1,fp8,fp8,0,0.5280128002166748
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,16,16,128,1,float16,fp8,0,0.3122368097305298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,16,16,128,1,fp8,fp8,0,0.30997118949890134
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,16,1,128,1,float16,float16,0,0.2969935894012451
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,16,1,128,1,float16,fp8,0,0.2880079984664917
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,16,1,128,1,fp8,fp8,0,0.2870527982711792
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,16,2,128,1,float16,float16,0,0.2935328006744385
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,16,2,128,1,float16,fp8,0,0.28563361167907714
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,16,2,128,1,fp8,fp8,0,0.2871984004974365
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,16,4,128,1,float16,float16,0,0.30306079387664797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,16,4,128,1,float16,fp8,0,0.2832223892211914
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,16,4,128,1,fp8,fp8,0,0.2845024108886719
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,16,8,128,1,float16,float16,0,0.3188431978225708
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,16,8,128,1,float16,fp8,0,0.28424479961395266
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,16,16,128,1,float16,float16,0,0.18944799900054932
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,16,16,128,1,float16,fp8,0,0.17290719747543334
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,16,8,128,1,fp8,fp8,0,0.28367040157318113
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,16,16,128,1,fp8,fp8,0,0.17317919731140136
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,16,1,128,1,float16,float16,0,0.16511679887771608
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,16,1,128,1,float16,fp8,0,0.15807520151138305
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,16,1,128,1,fp8,fp8,0,0.15652639865875245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,16,2,128,1,float16,float16,0,0.16485439538955687
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,16,2,128,1,float16,fp8,0,0.1568560004234314
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,16,2,128,1,fp8,fp8,0,0.15711840391159057
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,16,4,128,1,float16,fp8,0,0.1574735999107361
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,16,4,128,1,float16,float16,0,0.17100000381469727
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,16,4,128,1,fp8,fp8,0,0.1574079990386963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,16,8,128,1,float16,float16,0,0.17756799459457398
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,16,8,128,1,float16,fp8,0,0.15837119817733764
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,16,8,128,1,fp8,fp8,0,0.15865119695663452
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,16,16,128,1,float16,float16,0,0.11044160127639771
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,16,16,128,1,float16,fp8,0,0.10027999877929687
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,16,16,128,1,fp8,fp8,0,0.10001920461654663
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,16,1,128,1,float16,fp8,0,0.09357759952545167
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,16,1,128,1,fp8,fp8,0,0.09315040111541747
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,16,1,128,1,float16,float16,0,0.0968400001525879
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,16,2,128,1,float16,float16,0,0.09619359970092774
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,16,2,128,1,float16,fp8,0,0.09434880018234253
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,16,2,128,1,fp8,fp8,0,0.09261760115623474
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,16,4,128,1,float16,float16,0,0.09647039771080017
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,16,4,128,1,float16,fp8,0,0.09239680171012879
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,16,4,128,1,fp8,fp8,0,0.09343680143356323
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,16,8,128,1,float16,float16,0,0.10260640382766724
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,16,8,128,1,float16,fp8,0,0.09190080165863038
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,16,8,128,1,fp8,fp8,0,0.09288640022277832
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,16,16,128,1,float16,float16,0,0.06593440175056457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,16,16,128,1,float16,fp8,0,0.0636896014213562
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,16,16,128,1,fp8,fp8,0,0.0633840024471283
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,16,1,128,1,float16,float16,0,0.061787199974060056
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,16,1,128,1,float16,fp8,0,0.05987840294837952
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,16,1,128,1,fp8,fp8,0,0.060206401348114016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,16,2,128,1,float16,fp8,0,0.060313600301742556
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,16,2,128,1,float16,float16,0,0.06180480122566223
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,16,4,128,1,float16,float16,0,0.06405280232429504
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,16,2,128,1,fp8,fp8,0,0.05988640189170837
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,16,4,128,1,float16,fp8,0,0.059934401512146
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,16,4,128,1,fp8,fp8,0,0.059646397829055786
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,16,8,128,1,float16,float16,0,0.06536639928817749
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,16,8,128,1,fp8,fp8,0,0.05963680148124695
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,16,8,128,1,float16,fp8,0,0.059832000732421876
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,16,1,128,1,float16,float16,0,1.0159104347229004
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,16,1,128,1,float16,fp8,0,1.0321680068969727
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,16,1,128,1,fp8,fp8,0,1.027988815307617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,16,2,128,1,float16,float16,0,1.0159839630126952
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,16,2,128,1,float16,fp8,0,1.071735954284668
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,16,2,128,1,fp8,fp8,0,1.0309743881225586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,16,4,128,1,float16,float16,0,1.05600004196167
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,16,4,128,1,float16,fp8,0,1.0905311584472657
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,16,4,128,1,fp8,fp8,0,1.0243184089660644
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,16,8,128,1,float16,float16,0,1.190116786956787
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,16,8,128,1,float16,fp8,0,1.1095696449279786
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,16,16,128,1,float16,float16,0,0.7369520187377929
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,16,16,128,1,float16,fp8,0,0.6003056049346924
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,16,8,128,1,fp8,fp8,0,1.0234928131103516
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,16,1,128,1,float16,float16,0,0.5315584182739258
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,16,1,128,1,float16,fp8,0,0.5334847927093506
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,16,16,128,1,fp8,fp8,0,0.6776895999908448
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,16,1,128,1,fp8,fp8,0,0.5339151859283447
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,16,2,128,1,float16,float16,0,0.5255263805389404
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,16,2,128,1,float16,fp8,0,0.5755008220672607
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,16,2,128,1,fp8,fp8,0,0.5335072040557861
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,16,4,128,1,float16,float16,0,0.540015983581543
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,16,4,128,1,float16,fp8,0,0.5318319797515869
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,16,4,128,1,fp8,fp8,0,0.5314112186431885
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,16,8,128,1,float16,float16,0,0.5966991901397705
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,16,8,128,1,float16,fp8,0,0.5292096138000488
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,16,16,128,1,float16,fp8,0,0.3170896053314209
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,16,16,128,1,float16,float16,0,0.3947632074356079
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,16,8,128,1,fp8,fp8,0,0.5291120052337647
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,16,16,128,1,fp8,fp8,0,0.31738719940185545
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,16,1,128,1,float16,float16,0,0.3318687915802002
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,16,1,128,1,float16,fp8,0,0.28406078815460206
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,16,1,128,1,fp8,fp8,0,0.28418879508972167
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,16,2,128,1,float16,fp8,0,0.2827631950378418
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,16,2,128,1,float16,float16,0,0.33219358921051023
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,16,2,128,1,fp8,fp8,0,0.283568000793457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,16,4,128,1,float16,float16,0,0.29134559631347656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,16,4,128,1,float16,fp8,0,0.3384768009185791
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,16,8,128,1,float16,float16,0,0.3106031894683838
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,16,4,128,1,fp8,fp8,0,0.28382880687713624
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,16,8,128,1,float16,fp8,0,0.28089439868927
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,16,8,128,1,fp8,fp8,0,0.2791775941848755
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,16,16,128,1,float16,float16,0,0.18918559551239014
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,16,16,128,1,float16,fp8,0,0.17420159578323363
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,16,16,128,1,fp8,fp8,0,0.17312639951705933
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,16,1,128,1,float16,float16,0,0.1541440010070801
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,16,1,128,1,float16,fp8,0,0.15443840026855468
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,16,2,128,1,float16,float16,0,0.1551408052444458
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,16,1,128,1,fp8,fp8,0,0.15206559896469116
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,16,2,128,1,float16,fp8,0,0.15446399450302123
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,16,2,128,1,fp8,fp8,0,0.15363199710845948
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,16,4,128,1,fp8,fp8,0,0.1544816017150879
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,16,4,128,1,float16,fp8,0,0.15375360250473022
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,16,4,128,1,float16,float16,0,0.1606719970703125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,16,8,128,1,float16,float16,0,0.17086559534072876
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,16,16,128,1,float16,float16,0,0.10533920526504517
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,16,8,128,1,float16,fp8,0,0.15378719568252563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,16,8,128,1,fp8,fp8,0,0.15353280305862427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,16,16,128,1,float16,fp8,0,0.09783200025558472
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,16,1,128,1,float16,float16,0,0.0869488000869751
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,16,16,128,1,fp8,fp8,0,0.09726560115814209
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,16,1,128,1,float16,fp8,0,0.08749920129776001
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,16,1,128,1,fp8,fp8,0,0.08608160018920899
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,16,2,128,1,float16,float16,0,0.08758400082588196
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,16,2,128,1,float16,fp8,0,0.08707200288772583
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,16,2,128,1,fp8,fp8,0,0.08755679726600647
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,16,4,128,1,float16,float16,0,0.09120640158653259
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,16,4,128,1,float16,fp8,0,0.08641600012779235
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,16,4,128,1,fp8,fp8,0,0.08762720227241516
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,16,8,128,1,float16,fp8,0,0.08852319717407227
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,16,8,128,1,float16,float16,0,0.09678239822387695
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,16,16,128,1,float16,float16,0,0.06344799995422364
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,16,8,128,1,fp8,fp8,0,0.0873744010925293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,16,16,128,1,float16,fp8,0,0.060262399911880496
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,16,16,128,1,fp8,fp8,0,0.059988802671432494
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,16,1,128,1,float16,float16,0,0.05789120197296142
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,16,1,128,1,float16,fp8,0,0.05659840106964111
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,16,1,128,1,fp8,fp8,0,0.05626720190048218
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,16,2,128,1,float16,float16,0,0.056380802392959596
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,16,2,128,1,float16,fp8,0,0.055580800771713255
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,16,2,128,1,fp8,fp8,0,0.056227201223373414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,16,4,128,1,float16,float16,0,0.05793439745903015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,16,4,128,1,float16,fp8,0,0.055529600381851195
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,16,4,128,1,fp8,fp8,0,0.05564000010490418
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,16,8,128,1,float16,fp8,0,0.05573279857635498
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,16,8,128,1,float16,float16,0,0.05961920022964477
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,16,8,128,1,fp8,fp8,0,0.055553597211837766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,16,16,128,1,float16,float16,0,0.03905439972877502
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,16,16,128,1,float16,fp8,0,0.03911679983139038
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,16,16,128,1,fp8,fp8,0,0.03747200071811676
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,16,1,128,1,float16,float16,0,0.03651039898395538
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,16,1,128,1,float16,fp8,0,0.035062399506568906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,16,1,128,1,fp8,fp8,0,0.03592160046100616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,16,2,128,1,float16,float16,0,0.03518239855766296
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,16,2,128,1,float16,fp8,0,0.03600319921970367
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,16,2,128,1,fp8,fp8,0,0.035227200388908385
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,16,4,128,1,float16,float16,0,0.03716799914836884
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,16,4,128,1,float16,fp8,0,0.0350383996963501
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,16,4,128,1,fp8,fp8,0,0.035174399614334106
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,16,8,128,1,float16,float16,0,0.037062400579452516
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,16,8,128,1,float16,fp8,0,0.036929601430892946
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,16,8,128,1,fp8,fp8,0,0.03500800132751465
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,16,1,128,1,float16,float16,0,0.646673583984375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,16,1,128,1,fp8,fp8,0,0.6682112216949463
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,16,1,128,1,float16,fp8,0,0.665825605392456
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,16,2,128,1,float16,float16,0,0.6324944019317627
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,16,2,128,1,float16,fp8,0,0.6610655784606934
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,16,2,128,1,fp8,fp8,0,0.6643807888031006
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,16,4,128,1,float16,float16,0,0.6679743766784668
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,16,4,128,1,float16,fp8,0,0.6599904060363769
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,16,4,128,1,fp8,fp8,0,0.6626751899719239
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,16,8,128,1,float16,fp8,0,0.6942160129547119
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,16,8,128,1,float16,float16,0,0.7318687915802002
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,16,16,128,1,float16,float16,0,0.43624157905578614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,16,16,128,1,float16,fp8,0,0.43642559051513674
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,16,8,128,1,fp8,fp8,0,0.6598112106323242
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,16,16,128,1,fp8,fp8,0,0.3982367992401123
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,16,1,128,1,float16,float16,0,0.35666399002075194
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,16,1,128,1,float16,fp8,0,0.3494672060012817
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,16,1,128,1,fp8,fp8,0,0.34707839488983155
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,16,2,128,1,float16,float16,0,0.35146241188049315
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,16,2,128,1,float16,fp8,0,0.3488095998764038
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,16,2,128,1,fp8,fp8,0,0.3472480058670044
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,16,4,128,1,float16,float16,0,0.3511199951171875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,16,4,128,1,float16,fp8,0,0.3466048002243042
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,16,4,128,1,fp8,fp8,0,0.3459232091903687
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,16,8,128,1,float16,fp8,0,0.34444799423217776
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,16,8,128,1,float16,float16,0,0.3777856111526489
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,16,16,128,1,float16,fp8,0,0.2124255895614624
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,16,16,128,1,float16,float16,0,0.23000640869140626
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,16,8,128,1,fp8,fp8,0,0.34322400093078614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,16,1,128,1,float16,float16,0,0.18238240480422974
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,16,16,128,1,fp8,fp8,0,0.21254398822784423
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,16,1,128,1,float16,fp8,0,0.18479360342025758
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,16,1,128,1,fp8,fp8,0,0.18631839752197266
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,16,2,128,1,float16,float16,0,0.18102400302886962
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,16,2,128,1,float16,fp8,0,0.18464159965515137
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,16,2,128,1,fp8,fp8,0,0.18618400096893312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,16,4,128,1,float16,float16,0,0.19010080099105836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,16,4,128,1,float16,fp8,0,0.1841231942176819
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,16,4,128,1,fp8,fp8,0,0.18523039817810058
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,16,8,128,1,float16,float16,0,0.20417280197143556
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,16,8,128,1,float16,fp8,0,0.1841599941253662
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,16,8,128,1,fp8,fp8,0,0.18549120426177979
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,16,16,128,1,float16,float16,0,0.12557439804077147
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,16,16,128,1,float16,fp8,0,0.1168239951133728
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,16,16,128,1,fp8,fp8,0,0.11680320501327515
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,16,1,128,1,float16,float16,0,0.10176960229873658
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,16,1,128,1,float16,fp8,0,0.10033919811248779
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,16,2,128,1,float16,float16,0,0.10215519666671753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,16,1,128,1,fp8,fp8,0,0.10069760084152221
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,16,2,128,1,float16,fp8,0,0.10045759677886963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,16,2,128,1,fp8,fp8,0,0.10135040283203126
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,16,4,128,1,float16,float16,0,0.10529919862747192
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,16,4,128,1,float16,fp8,0,0.10131200551986694
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,16,4,128,1,fp8,fp8,0,0.10125279426574707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,16,8,128,1,fp8,fp8,0,0.10328480005264282
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,16,8,128,1,float16,float16,0,0.11327840089797973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,16,8,128,1,float16,fp8,0,0.10299999713897705
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,16,16,128,1,float16,fp8,0,0.06794400215148926
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,16,16,128,1,fp8,fp8,0,0.06848800182342529
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,16,1,128,1,float16,float16,0,0.06128479838371277
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,16,16,128,1,float16,float16,0,0.0728384017944336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,16,1,128,1,float16,fp8,0,0.06004480123519897
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,16,2,128,1,float16,float16,0,0.061083197593688965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,16,2,128,1,float16,fp8,0,0.06103519797325134
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,16,1,128,1,fp8,fp8,0,0.06112160086631775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,16,2,128,1,fp8,fp8,0,0.060313600301742556
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,16,4,128,1,float16,float16,0,0.06231200098991394
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,16,4,128,1,float16,fp8,0,0.05988159775733948
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,16,4,128,1,fp8,fp8,0,0.0611952006816864
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,16,8,128,1,float16,float16,0,0.06620960235595703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,16,8,128,1,float16,fp8,0,0.06140000224113464
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,16,16,128,1,float16,float16,0,0.043356800079345705
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,16,8,128,1,fp8,fp8,0,0.060172802209854125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,16,16,128,1,float16,fp8,0,0.04325920045375824
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,16,16,128,1,fp8,fp8,0,0.043279999494552614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,16,1,128,1,float16,float16,0,0.04114400148391724
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,16,1,128,1,fp8,fp8,0,0.039211198687553406
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,16,2,128,1,float16,float16,0,0.04120000004768372
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,16,1,128,1,float16,fp8,0,0.04057759940624237
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,16,2,128,1,float16,fp8,0,0.03918879926204681
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,16,2,128,1,fp8,fp8,0,0.03918879926204681
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,16,4,128,1,float16,float16,0,0.04137600064277649
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,16,4,128,1,fp8,fp8,0,0.03925600051879883
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,16,4,128,1,float16,fp8,0,0.039540800452232364
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,16,8,128,1,float16,float16,0,0.04238399863243103
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,16,8,128,1,float16,fp8,0,0.03932160139083862
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,16,8,128,1,fp8,fp8,0,0.03908160030841827
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,16,16,128,1,float16,fp8,0,0.031044799089431762
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,16,16,128,1,float16,float16,0,0.031227201223373413
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,16,16,128,1,fp8,fp8,0,0.030959999561309813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,16,1,128,1,float16,float16,0,0.028863999247550964
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,16,1,128,1,float16,fp8,0,0.028908801078796387
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,16,2,128,1,float16,float16,0,0.028992000222206115
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,16,2,128,1,float16,fp8,0,0.028863999247550964
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,16,1,128,1,fp8,fp8,0,0.02916960120201111
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,16,2,128,1,fp8,fp8,0,0.02882719933986664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,16,4,128,1,float16,float16,0,0.02905279994010925
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,16,4,128,1,float16,fp8,0,0.028939199447631837
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,16,8,128,1,float16,float16,0,0.030883198976516722
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,16,4,128,1,fp8,fp8,0,0.029267200827598573
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,16,8,128,1,float16,fp8,0,0.028971201181411742
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,16,8,128,1,fp8,fp8,0,0.02887679934501648
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,16,1,128,1,float16,float16,0,0.6624911785125732
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,16,2,128,1,float16,float16,0,0.6640192031860351
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,16,1,128,1,fp8,fp8,0,0.7230703830718994
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,16,1,128,1,float16,fp8,0,0.7194831848144532
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,16,2,128,1,float16,fp8,0,0.7152128219604492
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,16,2,128,1,fp8,fp8,0,0.717412805557251
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,16,4,128,1,float16,float16,0,0.6990287780761719
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,16,4,128,1,float16,fp8,0,0.7144239902496338
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,16,4,128,1,fp8,fp8,0,0.7137904167175293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,16,8,128,1,float16,float16,0,0.7793791770935059
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,16,8,128,1,float16,fp8,0,0.7141183853149414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,16,8,128,1,fp8,fp8,0,0.7143599987030029
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,16,16,128,1,float16,float16,0,0.5276591777801514
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,16,16,128,1,float16,fp8,0,0.4409167766571045
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,16,16,128,1,fp8,fp8,0,0.44249281883239744
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,16,1,128,1,float16,float16,0,0.4049647808074951
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,16,1,128,1,float16,fp8,0,0.3733743906021118
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,16,1,128,1,fp8,fp8,0,0.37238240242004395
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,16,2,128,1,float16,float16,0,0.3908159971237183
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,16,2,128,1,float16,fp8,0,0.37205440998077394
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,16,2,128,1,fp8,fp8,0,0.37166399955749513
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,16,4,128,1,float16,float16,0,0.3621376037597656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,16,4,128,1,float16,fp8,0,0.369321608543396
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,16,4,128,1,fp8,fp8,0,0.3721024036407471
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,16,8,128,1,float16,float16,0,0.4013040065765381
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,16,8,128,1,float16,fp8,0,0.36819360256195066
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,16,16,128,1,float16,float16,0,0.25023679733276366
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,16,8,128,1,fp8,fp8,0,0.36974880695343015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,16,16,128,1,float16,fp8,0,0.23119680881500243
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,16,1,128,1,float16,float16,0,0.1854464054107666
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,16,16,128,1,fp8,fp8,0,0.23195359706878663
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,16,1,128,1,float16,fp8,0,0.19558559656143187
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,16,1,128,1,fp8,fp8,0,0.19752639532089233
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,16,2,128,1,float16,float16,0,0.1831712007522583
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,16,2,128,1,float16,fp8,0,0.1957551956176758
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,16,2,128,1,fp8,fp8,0,0.19617760181427002
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,16,4,128,1,float16,float16,0,0.19358240365982055
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,16,4,128,1,float16,fp8,0,0.1954383969306946
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,16,4,128,1,fp8,fp8,0,0.19575999975204467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,16,8,128,1,float16,float16,0,0.2124016046524048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,16,8,128,1,float16,fp8,0,0.19507839679718017
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,16,16,128,1,float16,float16,0,0.13471839427947999
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,16,8,128,1,fp8,fp8,0,0.19578720331192018
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,16,16,128,1,fp8,fp8,0,0.12522879838943482
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,16,16,128,1,float16,fp8,0,0.12632160186767577
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,16,1,128,1,float16,float16,0,0.10203520059585572
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,16,1,128,1,float16,fp8,0,0.10688960552215576
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,16,1,128,1,fp8,fp8,0,0.10549440383911132
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,16,2,128,1,float16,float16,0,0.1035423994064331
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,16,2,128,1,float16,fp8,0,0.10588639974594116
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,16,2,128,1,fp8,fp8,0,0.10772479772567749
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,16,4,128,1,float16,float16,0,0.10669120550155639
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,16,4,128,1,float16,fp8,0,0.10703519582748414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,16,8,128,1,float16,float16,0,0.1176576018333435
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,16,4,128,1,fp8,fp8,0,0.10676319599151611
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,16,8,128,1,float16,fp8,0,0.10731199979782105
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,16,8,128,1,fp8,fp8,0,0.10771839618682862
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,16,16,128,1,float16,float16,0,0.07509920001029968
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,16,16,128,1,float16,fp8,0,0.07067840099334717
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,16,16,128,1,fp8,fp8,0,0.07153440117835999
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,16,1,128,1,float16,float16,0,0.058956801891326904
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,16,2,128,1,float16,float16,0,0.05743039846420288
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,16,1,128,1,float16,fp8,0,0.06030719876289368
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,16,1,128,1,fp8,fp8,0,0.05981760025024414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,16,2,128,1,float16,fp8,0,0.06096159815788269
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,16,2,128,1,fp8,fp8,0,0.0601360023021698
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,16,4,128,1,float16,float16,0,0.06060960292816162
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,16,4,128,1,float16,fp8,0,0.05981919765472412
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,16,8,128,1,float16,float16,0,0.0659712016582489
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,16,4,128,1,fp8,fp8,0,0.06066399812698364
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,16,8,128,1,float16,fp8,0,0.059918397665023805
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,16,8,128,1,fp8,fp8,0,0.06044800281524658
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,16,16,128,1,float16,float16,0,0.04319039881229401
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,16,16,128,1,float16,fp8,0,0.043219199776649474
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,16,16,128,1,fp8,fp8,0,0.04299359917640686
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,16,1,128,1,float16,float16,0,0.03743999898433685
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,16,1,128,1,float16,fp8,0,0.03765760064125061
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,16,1,128,1,fp8,fp8,0,0.03911199867725372
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,16,2,128,1,float16,float16,0,0.03734399974346161
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,16,2,128,1,fp8,fp8,0,0.038910400867462155
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,16,2,128,1,float16,fp8,0,0.03903039991855621
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,16,4,128,1,float16,float16,0,0.03920480012893677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,16,4,128,1,float16,fp8,0,0.038104000687599185
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,16,4,128,1,fp8,fp8,0,0.038899201154708865
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,16,8,128,1,float16,float16,0,0.041294398903846743
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,16,8,128,1,fp8,fp8,0,0.03891200125217438
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,16,16,128,1,float16,float16,0,0.026844799518585205
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,16,8,128,1,float16,fp8,0,0.03887040019035339
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,16,16,128,1,float16,fp8,0,0.026841598749160766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,16,16,128,1,fp8,fp8,0,0.026918399333953857
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,16,1,128,1,float16,float16,0,0.024644799530506134
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,16,1,128,1,fp8,fp8,0,0.024833600223064422
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,16,1,128,1,float16,fp8,0,0.024769599735736846
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,16,2,128,1,float16,float16,0,0.02463199943304062
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,16,2,128,1,float16,fp8,0,0.024691200256347655
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,16,4,128,1,float16,float16,0,0.024775999784469604
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,16,2,128,1,fp8,fp8,0,0.024902400374412537
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,16,4,128,1,float16,fp8,0,0.02476159930229187
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,16,4,128,1,fp8,fp8,0,0.024766400456428528
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,16,8,128,1,float16,float16,0,0.025332799553871153
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,16,8,128,1,float16,fp8,0,0.02465759962797165
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,16,16,128,1,float16,float16,0,0.02292640060186386
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,16,8,128,1,fp8,fp8,0,0.024710400402545928
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,16,16,128,1,float16,fp8,0,0.024723200500011443
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,16,1,128,1,float16,float16,0,0.022651199996471406
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,16,16,128,1,fp8,fp8,0,0.024803200364112855
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,16,1,128,1,float16,fp8,0,0.02290560007095337
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,16,1,128,1,fp8,fp8,0,0.022705599665641785
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,16,2,128,1,float16,float16,0,0.022734400629997254
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,16,2,128,1,float16,fp8,0,0.02277279943227768
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,16,2,128,1,fp8,fp8,0,0.02271520048379898
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,16,4,128,1,float16,float16,0,0.022787199914455415
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,16,4,128,1,float16,fp8,0,0.022859199345111846
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,16,4,128,1,fp8,fp8,0,0.022756800055503845
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,16,8,128,1,float16,float16,0,0.022777600586414336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,16,8,128,1,fp8,fp8,0,0.02279520034790039
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,16,8,128,1,float16,fp8,0,0.022752000391483305
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,16,1,128,1,float16,float16,0,0.4914463996887207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,16,1,128,1,float16,fp8,0,0.5592735767364502
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,16,1,128,1,fp8,fp8,0,0.5650432109832764
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,16,2,128,1,float16,float16,0,0.4863471984863281
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,16,2,128,1,float16,fp8,0,0.5590591907501221
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,16,2,128,1,fp8,fp8,0,0.5630671977996826
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,16,4,128,1,float16,float16,0,0.5248591899871826
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,16,4,128,1,float16,fp8,0,0.556824016571045
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,16,4,128,1,fp8,fp8,0,0.5613024234771729
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,16,8,128,1,float16,float16,0,0.6009696006774903
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,16,8,128,1,float16,fp8,0,0.5551199913024902
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,16,8,128,1,fp8,fp8,0,0.5598464012145996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,16,16,128,1,float16,float16,0,0.39009599685668944
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,16,16,128,1,float16,fp8,0,0.35848639011383054
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,16,16,128,1,fp8,fp8,0,0.36232800483703614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,16,1,128,1,float16,float16,0,0.2552704095840454
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,16,1,128,1,float16,fp8,0,0.29083681106567383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,16,1,128,1,fp8,fp8,0,0.293889594078064
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,16,2,128,1,float16,float16,0,0.252892804145813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,16,2,128,1,float16,fp8,0,0.2898416042327881
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,16,2,128,1,fp8,fp8,0,0.2940448045730591
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,16,4,128,1,float16,float16,0,0.273636794090271
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,16,4,128,1,float16,fp8,0,0.28785440921783445
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,16,4,128,1,fp8,fp8,0,0.29190239906311033
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,16,8,128,1,float16,float16,0,0.31000959873199463
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,16,8,128,1,float16,fp8,0,0.28714399337768554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,16,16,128,1,float16,float16,0,0.2023616075515747
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,16,8,128,1,fp8,fp8,0,0.2904144048690796
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,16,16,128,1,float16,fp8,0,0.18700640201568602
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,16,1,128,1,float16,float16,0,0.1361199975013733
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,16,16,128,1,fp8,fp8,0,0.18832000494003295
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,16,1,128,1,float16,fp8,0,0.15166560411453248
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,16,1,128,1,fp8,fp8,0,0.15338560342788696
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,16,2,128,1,float16,float16,0,0.1352895975112915
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,16,2,128,1,float16,fp8,0,0.15130720138549805
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,16,2,128,1,fp8,fp8,0,0.15263999700546266
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,16,4,128,1,float16,float16,0,0.1468000054359436
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,16,4,128,1,fp8,fp8,0,0.15236639976501465
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,16,4,128,1,float16,fp8,0,0.15156960487365723
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,16,8,128,1,float16,float16,0,0.16611039638519287
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,16,8,128,1,float16,fp8,0,0.15247679948806764
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,16,16,128,1,float16,fp8,0,0.10329920053482056
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,16,16,128,1,fp8,fp8,0,0.10071040391921997
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,16,8,128,1,fp8,fp8,0,0.15132960081100463
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,16,16,128,1,float16,float16,0,0.10840480327606201
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,16,1,128,1,float16,float16,0,0.07630720138549804
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,16,1,128,1,float16,fp8,0,0.08219839930534363
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,16,1,128,1,fp8,fp8,0,0.08192319869995117
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,16,2,128,1,float16,float16,0,0.07603840231895446
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,16,2,128,1,float16,fp8,0,0.08269439935684204
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,16,4,128,1,float16,float16,0,0.07889119982719421
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,16,2,128,1,fp8,fp8,0,0.08267040252685547
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,16,4,128,1,float16,fp8,0,0.08334879875183106
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,16,4,128,1,fp8,fp8,0,0.08245919942855835
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,16,8,128,1,float16,float16,0,0.08870720267295837
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,16,8,128,1,float16,fp8,0,0.08217440247535705
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,16,16,128,1,float16,float16,0,0.05945919752120972
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,16,16,128,1,float16,fp8,0,0.05596799850463867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,16,8,128,1,fp8,fp8,0,0.08283039927482605
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,16,16,128,1,fp8,fp8,0,0.05721759796142578
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,16,1,128,1,float16,float16,0,0.043224000930786134
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,16,1,128,1,float16,fp8,0,0.04544639885425568
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,16,2,128,1,float16,float16,0,0.042089599370956424
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,16,1,128,1,fp8,fp8,0,0.04540480077266693
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,16,2,128,1,float16,fp8,0,0.0463344007730484
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,16,2,128,1,fp8,fp8,0,0.04563519954681396
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,16,4,128,1,float16,float16,0,0.045289599895477296
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,16,4,128,1,float16,fp8,0,0.04532960057258606
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,16,4,128,1,fp8,fp8,0,0.04666720032691955
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,16,8,128,1,float16,float16,0,0.051614397764205934
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,16,8,128,1,float16,fp8,0,0.04563199877738953
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,16,16,128,1,float16,float16,0,0.033718401193618776
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,16,8,128,1,fp8,fp8,0,0.045572799444198606
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,16,16,128,1,float16,fp8,0,0.032995200157165526
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,16,16,128,1,fp8,fp8,0,0.03314239978790283
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,16,1,128,1,float16,float16,0,0.02837119996547699
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,16,1,128,1,float16,fp8,0,0.029151999950408937
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,16,1,128,1,fp8,fp8,0,0.02885119915008545
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,16,2,128,1,float16,float16,0,0.0271232008934021
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,16,2,128,1,float16,fp8,0,0.029187199473381043
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,16,4,128,1,float16,float16,0,0.028836798667907716
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,16,2,128,1,fp8,fp8,0,0.029025599360466003
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,16,4,128,1,float16,fp8,0,0.02900159955024719
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,16,4,128,1,fp8,fp8,0,0.029585599899291992
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,16,8,128,1,float16,float16,0,0.030908799171447753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,16,8,128,1,fp8,fp8,0,0.029028800129890443
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,16,8,128,1,float16,fp8,0,0.028881600499153136
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,16,16,128,1,float16,float16,0,0.02069759964942932
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,16,16,128,1,float16,fp8,0,0.020740799605846405
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,16,16,128,1,fp8,fp8,0,0.020660799741744996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,16,1,128,1,float16,float16,0,0.018488000333309173
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,16,1,128,1,float16,fp8,0,0.01866399943828583
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,16,2,128,1,float16,float16,0,0.01849920004606247
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,16,1,128,1,fp8,fp8,0,0.01860000044107437
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,16,2,128,1,float16,fp8,0,0.018654400110244752
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,16,2,128,1,fp8,fp8,0,0.01868959963321686
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,16,4,128,1,float16,float16,0,0.019065600633621217
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,16,4,128,1,float16,fp8,0,0.018644799292087556
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,16,4,128,1,fp8,fp8,0,0.01870719939470291
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,16,8,128,1,float16,float16,0,0.018651199340820313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,16,8,128,1,float16,fp8,0,0.018614399433135986
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,16,8,128,1,fp8,fp8,0,0.018795199692249298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,16,16,128,1,float16,float16,0,0.01830720007419586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,16,16,128,1,float16,fp8,0,0.0186271995306015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,16,16,128,1,fp8,fp8,0,0.018751999735832213
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,16,1,128,1,float16,float16,0,0.01656000018119812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,16,1,128,1,float16,fp8,0,0.01664319932460785
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,16,1,128,1,fp8,fp8,0,0.017235200107097625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,16,2,128,1,float16,fp8,0,0.016582399606704712
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,16,2,128,1,float16,float16,0,0.016484799981117248
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,16,2,128,1,fp8,fp8,0,0.017632000148296356
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,16,4,128,1,float16,float16,0,0.016564799845218657
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,16,4,128,1,float16,fp8,0,0.016659200191497803
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,16,4,128,1,fp8,fp8,0,0.01780160069465637
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,16,8,128,1,float16,float16,0,0.016769599914550782
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,16,8,128,1,float16,fp8,0,0.016574400663375854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,16,16,128,1,float16,float16,0,0.016575999557971954
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,16,8,128,1,fp8,fp8,0,0.018486399948596955
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,16,16,128,1,float16,fp8,0,0.016633599996566772
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,16,16,128,1,fp8,fp8,0,0.01658879965543747
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,16,1,128,1,float16,float16,0,0.016702400147914888
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,16,1,128,1,float16,fp8,0,0.01652639955282211
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,16,1,128,1,fp8,fp8,0,0.01663679927587509
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,16,2,128,1,float16,float16,0,0.016734400391578676
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,16,2,128,1,float16,fp8,0,0.016598400473594666
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,16,2,128,1,fp8,fp8,0,0.016616000235080718
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,16,4,128,1,float16,float16,0,0.016728000342845918
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,16,4,128,1,float16,fp8,0,0.016548800468444824
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,16,4,128,1,fp8,fp8,0,0.016598400473594666
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,16,8,128,1,float16,float16,0,0.016726399958133697
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,16,8,128,1,float16,fp8,0,0.016543999314308167
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,16,8,128,1,fp8,fp8,0,0.016616000235080718
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,16,1,128,1,float16,float16,0,0.20463199615478517
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,16,1,128,1,fp8,fp8,0,0.24380960464477539
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,16,1,128,1,float16,fp8,0,0.2459552049636841
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,16,2,128,1,float16,float16,0,0.2041408061981201
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,16,2,128,1,float16,fp8,0,0.24399518966674805
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,16,2,128,1,fp8,fp8,0,0.2452768087387085
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,16,4,128,1,float16,float16,0,0.22232000827789306
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,16,4,128,1,float16,fp8,0,0.24307680130004883
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,16,4,128,1,fp8,fp8,0,0.24446558952331543
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,16,8,128,1,float16,float16,0,0.2613408088684082
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,16,8,128,1,float16,fp8,0,0.2429487943649292
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,16,8,128,1,fp8,fp8,0,0.24219679832458496
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,16,16,128,1,float16,float16,0,0.1775871992111206
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,16,16,128,1,float16,fp8,0,0.1640895962715149
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,16,16,128,1,fp8,fp8,0,0.16411999464035035
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,16,1,128,1,float16,float16,0,0.1101040005683899
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,16,1,128,1,float16,fp8,0,0.12841440439224244
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,16,2,128,1,float16,float16,0,0.1104688048362732
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,16,1,128,1,fp8,fp8,0,0.1297152042388916
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,16,2,128,1,float16,fp8,0,0.12898080348968505
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,16,2,128,1,fp8,fp8,0,0.12945120334625243
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,16,4,128,1,float16,float16,0,0.1192896008491516
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,16,4,128,1,float16,fp8,0,0.12850719690322876
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,16,4,128,1,fp8,fp8,0,0.12925920486450196
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,16,8,128,1,float16,float16,0,0.1387503981590271
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,16,8,128,1,float16,fp8,0,0.12802239656448364
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,16,16,128,1,float16,float16,0,0.09479359984397888
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,16,8,128,1,fp8,fp8,0,0.1294160008430481
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,16,16,128,1,float16,fp8,0,0.08622879981994629
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,16,1,128,1,float16,float16,0,0.060070401430130003
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,16,16,128,1,fp8,fp8,0,0.08646559715270996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,16,1,128,1,float16,fp8,0,0.06766240000724792
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,16,2,128,1,float16,float16,0,0.0602944016456604
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,16,1,128,1,fp8,fp8,0,0.06785439848899841
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,16,2,128,1,float16,fp8,0,0.0674448013305664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,16,2,128,1,fp8,fp8,0,0.06782079935073852
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,16,4,128,1,float16,float16,0,0.06546080112457275
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,16,4,128,1,float16,fp8,0,0.06829439997673034
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,16,4,128,1,fp8,fp8,0,0.06805440187454223
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,16,8,128,1,float16,float16,0,0.07542080283164979
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,16,8,128,1,float16,fp8,0,0.06863200068473815
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,16,16,128,1,float16,float16,0,0.053660798072814944
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,16,16,128,1,float16,fp8,0,0.04955680072307587
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,16,8,128,1,fp8,fp8,0,0.06839200258255004
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,16,16,128,1,fp8,fp8,0,0.05136160254478454
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,16,1,128,1,float16,float16,0,0.034985598921775815
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,16,1,128,1,float16,fp8,0,0.03919360041618347
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,16,2,128,1,float16,float16,0,0.034980800747871396
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,16,1,128,1,fp8,fp8,0,0.040808001160621644
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,16,2,128,1,float16,fp8,0,0.03914879858493805
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,16,4,128,1,float16,float16,0,0.039043200016021726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,16,2,128,1,fp8,fp8,0,0.03914879858493805
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,16,4,128,1,float16,fp8,0,0.039201599359512326
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,16,4,128,1,fp8,fp8,0,0.04107680022716522
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,16,8,128,1,float16,float16,0,0.04372960031032562
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,16,8,128,1,float16,fp8,0,0.0392879992723465
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,16,8,128,1,fp8,fp8,0,0.039233601093292235
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,16,16,128,1,float16,float16,0,0.028174400329589844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,16,16,128,1,float16,fp8,0,0.028862398862838746
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,16,1,128,1,float16,float16,0,0.022710399329662324
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,16,1,128,1,float16,fp8,0,0.024723200500011443
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,16,16,128,1,fp8,fp8,0,0.029766398668289184
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,16,1,128,1,fp8,fp8,0,0.02515999972820282
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,16,2,128,1,float16,float16,0,0.022708800435066224
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,16,2,128,1,fp8,fp8,0,0.024774399399757386
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,16,2,128,1,float16,fp8,0,0.024937599897384644
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,16,4,128,1,float16,float16,0,0.022780799865722658
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,16,4,128,1,float16,fp8,0,0.024721600115299225
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,16,8,128,1,float16,float16,0,0.024843199551105498
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,16,4,128,1,fp8,fp8,0,0.02468640059232712
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,16,8,128,1,float16,fp8,0,0.024743999540805816
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,16,8,128,1,fp8,fp8,0,0.024740800261497498
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,16,16,128,1,float16,float16,0,0.01711200028657913
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,16,16,128,1,fp8,fp8,0,0.018620799481868743
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,16,16,128,1,float16,fp8,0,0.018651199340820313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,16,1,128,1,float16,float16,0,0.014641599357128143
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,16,1,128,1,float16,fp8,0,0.01658719927072525
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,16,1,128,1,fp8,fp8,0,0.016579200327396394
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,16,2,128,1,float16,float16,0,0.014699199795722961
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,16,2,128,1,float16,fp8,0,0.016569599509239197
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,16,2,128,1,fp8,fp8,0,0.016527999937534333
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,16,4,128,1,float16,float16,0,0.016704000532627106
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,16,4,128,1,float16,fp8,0,0.01658879965543747
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,16,4,128,1,fp8,fp8,0,0.01646080017089844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,16,8,128,1,float16,fp8,0,0.01659200042486191
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,16,8,128,1,float16,float16,0,0.016916799545288085
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,16,8,128,1,fp8,fp8,0,0.016553600132465363
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,16,16,128,1,float16,float16,0,0.014635199308395385
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,16,16,128,1,float16,fp8,0,0.016672000288963318
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,16,16,128,1,fp8,fp8,0,0.01592160016298294
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,16,1,128,1,float16,float16,0,0.014521600306034088
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,16,1,128,1,float16,fp8,0,0.014575999975204468
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,16,2,128,1,float16,float16,0,0.014508800208568573
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,16,1,128,1,fp8,fp8,0,0.014537599682807923
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,16,4,128,1,float16,float16,0,0.014529600739479065
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,16,2,128,1,float16,fp8,0,0.014838400483131408
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,16,4,128,1,float16,fp8,0,0.014494399726390838
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,16,2,128,1,fp8,fp8,0,0.014593599736690522
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,16,4,128,1,fp8,fp8,0,0.014814400672912597
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,16,8,128,1,float16,fp8,0,0.014532800018787383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,16,8,128,1,float16,float16,0,0.01462240070104599
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,16,16,128,1,float16,float16,0,0.014529600739479065
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,16,8,128,1,fp8,fp8,0,0.014777599275112152
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,16,16,128,1,float16,fp8,0,0.014579200744628906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,16,16,128,1,fp8,fp8,0,0.014524799585342408
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,16,1,128,1,float16,float16,0,0.014646400511264802
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,16,1,128,1,float16,fp8,0,0.014519999921321868
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,16,1,128,1,fp8,fp8,0,0.014267200231552124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,16,2,128,1,float16,float16,0,0.014502400159835815
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,16,2,128,1,float16,fp8,0,0.014476799964904785
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,16,2,128,1,fp8,fp8,0,0.014467200636863709
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,16,4,128,1,float16,float16,0,0.01448799967765808
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,16,4,128,1,float16,fp8,0,0.01451839953660965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,16,4,128,1,fp8,fp8,0,0.014552000164985656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,16,8,128,1,float16,float16,0,0.014475199580192565
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,16,8,128,1,float16,fp8,0,0.014497600495815277
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,16,8,128,1,fp8,fp8,0,0.014564800262451171
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,16,16,128,1,float16,float16,0,0.014422400295734406
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,16,16,128,1,float16,fp8,0,0.014511999487876893
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,16,16,128,1,fp8,fp8,0,0.014526399970054626
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,16,1,128,1,float16,float16,0,0.014502400159835815
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,16,1,128,1,float16,fp8,0,0.01377439945936203
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,16,1,128,1,fp8,fp8,0,0.013831999897956849
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,16,2,128,1,float16,float16,0,0.01451839953660965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,16,2,128,1,float16,fp8,0,0.01451680064201355
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,16,2,128,1,fp8,fp8,0,0.014524799585342408
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,16,4,128,1,float16,fp8,0,0.014451199769973755
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,16,4,128,1,float16,float16,0,0.014572800695896148
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,16,4,128,1,fp8,fp8,0,0.014526399970054626
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,16,8,128,1,float16,float16,0,0.012964800000190735
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,16,8,128,1,fp8,fp8,0,0.014537599682807923
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,16,8,128,1,float16,fp8,0,0.014507199823856353
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,16,1,128,1,float16,float16,0,0.12920479774475097
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,16,1,128,1,fp8,fp8,0,0.14758720397949218
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,16,1,128,1,float16,fp8,0,0.1479408025741577
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,16,2,128,1,float16,float16,0,0.12873120307922364
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,16,2,128,1,fp8,fp8,0,0.1476415991783142
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,16,2,128,1,float16,fp8,0,0.1474287986755371
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,16,4,128,1,float16,float16,0,0.13803679943084718
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,16,4,128,1,float16,fp8,0,0.1476639986038208
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,16,4,128,1,fp8,fp8,0,0.14702240228652955
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,16,8,128,1,float16,float16,0,0.15691039562225342
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,16,8,128,1,float16,fp8,0,0.14710400104522706
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,16,16,128,1,float16,float16,0,0.10036319494247437
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,16,8,128,1,fp8,fp8,0,0.14670079946517944
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,16,16,128,1,float16,fp8,0,0.09654080271720886
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,16,16,128,1,fp8,fp8,0,0.09756159782409668
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,16,1,128,1,float16,float16,0,0.07020959854125977
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,16,1,128,1,float16,fp8,0,0.07811520099639893
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,16,1,128,1,fp8,fp8,0,0.07874879837036133
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,16,2,128,1,float16,float16,0,0.07044960260391235
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,16,2,128,1,float16,fp8,0,0.07826240062713623
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,16,2,128,1,fp8,fp8,0,0.07883039712905884
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,16,4,128,1,float16,float16,0,0.07493759989738465
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,16,4,128,1,float16,fp8,0,0.0787551999092102
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,16,4,128,1,fp8,fp8,0,0.07888000011444092
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,16,8,128,1,float16,float16,0,0.08416479825973511
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,16,8,128,1,float16,fp8,0,0.07892959713935851
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,16,8,128,1,fp8,fp8,0,0.07875999808311462
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,16,16,128,1,float16,float16,0,0.053660798072814944
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,16,16,128,1,float16,fp8,0,0.05316320061683655
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,16,16,128,1,fp8,fp8,0,0.053116798400878906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,16,1,128,1,float16,float16,0,0.03707680106163025
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,16,1,128,1,float16,fp8,0,0.042233601212501526
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,16,1,128,1,fp8,fp8,0,0.043244799971580504
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,16,2,128,1,float16,float16,0,0.03786399960517883
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,16,2,128,1,float16,fp8,0,0.04321439862251282
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,16,4,128,1,float16,float16,0,0.04113759994506836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,16,4,128,1,float16,fp8,0,0.04322080016136169
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,16,2,128,1,fp8,fp8,0,0.043463999032974245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,16,4,128,1,fp8,fp8,0,0.043166399002075195
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,16,8,128,1,float16,float16,0,0.04736959934234619
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,16,8,128,1,float16,fp8,0,0.043398401141166686
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,16,16,128,1,float16,float16,0,0.030929601192474364
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,16,8,128,1,fp8,fp8,0,0.04312640130519867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,16,16,128,1,float16,fp8,0,0.03119199872016907
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,16,1,128,1,float16,float16,0,0.024803200364112855
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,16,16,128,1,fp8,fp8,0,0.03102239966392517
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,16,1,128,1,fp8,fp8,0,0.02677760124206543
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,16,1,128,1,float16,fp8,0,0.027051201462745665
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,16,2,128,1,float16,float16,0,0.024736000597476958
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,16,2,128,1,float16,fp8,0,0.026804798841476442
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,16,4,128,1,float16,float16,0,0.024747200310230255
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,16,2,128,1,fp8,fp8,0,0.027079999446868896
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,16,4,128,1,fp8,fp8,0,0.026820799708366393
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,16,4,128,1,float16,fp8,0,0.026820799708366393
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,16,8,128,1,float16,float16,0,0.029032000899314882
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,16,8,128,1,float16,fp8,0,0.02682720124721527
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,16,8,128,1,fp8,fp8,0,0.02675839960575104
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,16,16,128,1,float16,float16,0,0.01873439997434616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,16,16,128,1,float16,fp8,0,0.020147199928760528
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,16,16,128,1,fp8,fp8,0,0.018911999464035035
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,16,1,128,1,float16,float16,0,0.01661919951438904
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,16,1,128,1,float16,fp8,0,0.016681599617004394
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,16,1,128,1,fp8,fp8,0,0.016652800142765045
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,16,2,128,1,float16,fp8,0,0.016601599752902985
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,16,2,128,1,float16,float16,0,0.016761599481105803
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,16,2,128,1,fp8,fp8,0,0.016654400527477263
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,16,4,128,1,float16,float16,0,0.016648000478744505
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,16,4,128,1,fp8,fp8,0,0.01661120057106018
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,16,4,128,1,float16,fp8,0,0.018723200261592864
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,16,8,128,1,float16,float16,0,0.018566399812698364
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,16,8,128,1,float16,fp8,0,0.016596800088882445
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,16,8,128,1,fp8,fp8,0,0.01870719939470291
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,16,16,128,1,float16,float16,0,0.012489599734544754
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,16,16,128,1,float16,fp8,0,0.012828800082206725
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,16,1,128,1,float16,float16,0,0.012478400021791458
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,16,1,128,1,float16,fp8,0,0.012572799623012543
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,16,16,128,1,fp8,fp8,0,0.014672000706195832
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,16,1,128,1,fp8,fp8,0,0.01249919980764389
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,16,2,128,1,float16,float16,0,0.012441600114107132
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,16,2,128,1,float16,fp8,0,0.01266240030527115
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,16,2,128,1,fp8,fp8,0,0.012521600723266602
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,16,4,128,1,float16,float16,0,0.012488000094890594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,16,4,128,1,float16,fp8,0,0.012625600397586822
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,16,4,128,1,fp8,fp8,0,0.01250080019235611
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,16,8,128,1,float16,float16,0,0.012532800436019897
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,16,8,128,1,fp8,fp8,0,0.012537600100040435
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,16,8,128,1,float16,fp8,0,0.012611199915409089
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,16,16,128,1,float16,float16,0,0.012457600235939026
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,16,16,128,1,float16,fp8,0,0.01250080019235611
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,16,16,128,1,fp8,fp8,0,0.012583999335765839
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,16,1,128,1,float16,float16,0,0.010478399693965912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,16,1,128,1,fp8,fp8,0,0.012451200187206269
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,16,1,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,16,2,128,1,float16,float16,0,0.010718400031328202
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,16,2,128,1,fp8,fp8,0,0.01058880016207695
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,16,2,128,1,float16,fp8,0,0.012476799637079239
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,16,4,128,1,float16,float16,0,0.010499200224876404
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,16,4,128,1,float16,fp8,0,0.012460800260305405
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,16,8,128,1,float16,float16,0,0.0108255997300148
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,16,4,128,1,fp8,fp8,0,0.012495999783277511
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,16,8,128,1,float16,fp8,0,0.012503999471664428
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,16,8,128,1,fp8,fp8,0,0.010625600069761276
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,16,16,128,1,float16,float16,0,0.011083199828863143
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,16,16,128,1,float16,fp8,0,0.011558400094509124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,16,16,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,16,1,128,1,float16,float16,0,0.010664000362157821
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,16,1,128,1,float16,fp8,0,0.010500799864530563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,16,1,128,1,fp8,fp8,0,0.01051200032234192
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,16,2,128,1,float16,float16,0,0.010806400328874588
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,16,2,128,1,float16,fp8,0,0.010515200346708298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,16,2,128,1,fp8,fp8,0,0.010502400249242783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,16,4,128,1,float16,float16,0,0.010753600299358368
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,16,4,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,16,4,128,1,fp8,fp8,0,0.0105103999376297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,16,8,128,1,float16,float16,0,0.011664000153541566
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,16,8,128,1,float16,fp8,0,0.010435199737548828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,16,8,128,1,fp8,fp8,0,0.010444799810647965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,16,16,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,16,16,128,1,float16,float16,0,0.012067200243473053
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,16,16,128,1,fp8,fp8,0,0.010566399991512298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,16,1,128,1,float16,float16,0,0.01053600013256073
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,16,1,128,1,float16,fp8,0,0.010660800337791442
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,16,1,128,1,fp8,fp8,0,0.010527999699115753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,16,2,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,16,2,128,1,float16,fp8,0,0.010532800108194351
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,16,2,128,1,fp8,fp8,0,0.01051200032234192
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,16,4,128,1,float16,float16,0,0.010444799810647965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,16,4,128,1,float16,fp8,0,0.010406400263309478
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,16,4,128,1,fp8,fp8,0,0.010447999835014344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,16,8,128,1,float16,float16,0,0.010440000146627427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,16,8,128,1,float16,fp8,0,0.010407999902963639
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,16,8,128,1,fp8,fp8,0,0.01055999994277954
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,16,1,128,1,float16,float16,0,0.10691679716110229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,16,1,128,1,float16,fp8,0,0.11292959451675415
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,16,2,128,1,float16,float16,0,0.10676159858703613
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,16,1,128,1,fp8,fp8,0,0.11365280151367188
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,16,2,128,1,float16,fp8,0,0.11300640106201172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,16,2,128,1,fp8,fp8,0,0.11309759616851807
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,16,4,128,1,float16,float16,0,0.11125760078430176
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,16,4,128,1,float16,fp8,0,0.11314079761505128
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,16,4,128,1,fp8,fp8,0,0.11307359933853149
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,16,8,128,1,float16,float16,0,0.1206976056098938
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,16,8,128,1,float16,fp8,0,0.11314719915390015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,16,8,128,1,fp8,fp8,0,0.11298880577087403
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,16,16,128,1,float16,float16,0,0.0720255970954895
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,16,16,128,1,float16,fp8,0,0.07033439874649047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,16,16,128,1,fp8,fp8,0,0.07094879746437073
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,16,1,128,1,float16,float16,0,0.05768640041351318
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,16,1,128,1,float16,fp8,0,0.06034719944000244
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,16,1,128,1,fp8,fp8,0,0.061603200435638425
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,16,2,128,1,float16,float16,0,0.05659040212631226
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,16,2,128,1,float16,fp8,0,0.06090720295906067
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,16,2,128,1,fp8,fp8,0,0.061635202169418334
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,16,4,128,1,float16,float16,0,0.0602512001991272
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,16,4,128,1,float16,fp8,0,0.06007999777793884
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,16,4,128,1,fp8,fp8,0,0.061647999286651614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,16,8,128,1,float16,float16,0,0.06560959815979003
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,16,8,128,1,float16,fp8,0,0.061540800333023074
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,16,8,128,1,fp8,fp8,0,0.06160640120506287
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,16,16,128,1,float16,float16,0,0.03922879993915558
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,16,16,128,1,float16,fp8,0,0.03911199867725372
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,16,16,128,1,fp8,fp8,0,0.039129599928855896
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,16,1,128,1,float16,float16,0,0.03314880132675171
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,16,1,128,1,float16,fp8,0,0.03503359854221344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,16,1,128,1,fp8,fp8,0,0.03495680093765259
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,16,2,128,1,float16,float16,0,0.03299840092658997
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,16,2,128,1,float16,fp8,0,0.03507519960403442
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,16,2,128,1,fp8,fp8,0,0.03504480123519897
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,16,4,128,1,float16,float16,0,0.03309119939804077
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,16,4,128,1,float16,fp8,0,0.034959998726844785
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,16,4,128,1,fp8,fp8,0,0.03490720093250275
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,16,8,128,1,float16,fp8,0,0.03497759997844696
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,16,8,128,1,float16,float16,0,0.03529599905014038
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,16,8,128,1,fp8,fp8,0,0.03359839916229248
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,16,16,128,1,float16,float16,0,0.022780799865722658
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,16,16,128,1,float16,fp8,0,0.02473759949207306
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,16,16,128,1,fp8,fp8,0,0.024854399263858795
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,16,1,128,1,float16,fp8,0,0.022726400196552275
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,16,1,128,1,fp8,fp8,0,0.022646400332450866
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,16,1,128,1,float16,float16,0,0.020660799741744996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,16,2,128,1,float16,float16,0,0.020947200059890748
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,16,2,128,1,fp8,fp8,0,0.022644799947738648
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,16,2,128,1,float16,fp8,0,0.022732800245285033
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,16,4,128,1,float16,float16,0,0.022617599368095397
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,16,4,128,1,float16,fp8,0,0.02273920029401779
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,16,4,128,1,fp8,fp8,0,0.022819200158119203
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,16,8,128,1,float16,float16,0,0.022969600558280946
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,16,8,128,1,float16,fp8,0,0.02268799990415573
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,16,8,128,1,fp8,fp8,0,0.022686399519443512
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,16,16,128,1,float16,fp8,0,0.016550399363040924
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,16,16,128,1,float16,float16,0,0.016892799735069276
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,16,16,128,1,fp8,fp8,0,0.016574400663375854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,16,1,128,1,float16,float16,0,0.014496000111103058
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,16,1,128,1,fp8,fp8,0,0.014609600603580474
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,16,1,128,1,float16,fp8,0,0.014875200390815736
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,16,2,128,1,float16,float16,0,0.014574399590492249
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,16,2,128,1,float16,fp8,0,0.01456640064716339
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,16,4,128,1,float16,float16,0,0.014564800262451171
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,16,2,128,1,fp8,fp8,0,0.014839999377727509
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,16,4,128,1,float16,fp8,0,0.014591999351978302
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,16,4,128,1,fp8,fp8,0,0.014523200690746307
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,16,8,128,1,float16,float16,0,0.01664479970932007
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,16,8,128,1,float16,fp8,0,0.014521600306034088
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,16,8,128,1,fp8,fp8,0,0.01454080045223236
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,16,16,128,1,float16,float16,0,0.012590399384498597
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,16,16,128,1,float16,fp8,0,0.012593600153923034
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,16,16,128,1,fp8,fp8,0,0.012520000338554382
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,16,1,128,1,float16,float16,0,0.012409599870443344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,16,1,128,1,float16,fp8,0,0.010524799674749374
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,16,1,128,1,fp8,fp8,0,0.012388800084590913
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,16,2,128,1,float16,float16,0,0.0124208003282547
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,16,2,128,1,float16,fp8,0,0.010491199791431427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,16,2,128,1,fp8,fp8,0,0.012404800206422806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,16,4,128,1,float16,float16,0,0.01242400035262108
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,16,4,128,1,float16,fp8,0,0.01043360009789467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,16,4,128,1,fp8,fp8,0,0.012308800220489502
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,16,8,128,1,float16,float16,0,0.012412799894809723
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,16,8,128,1,float16,fp8,0,0.010438399761915207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,16,8,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,16,16,128,1,float16,float16,0,0.01199359968304634
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,16,16,128,1,float16,fp8,0,0.010545600205659866
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,16,16,128,1,fp8,fp8,0,0.010425599664449692
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,16,1,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,16,1,128,1,float16,float16,0,0.010491199791431427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,16,2,128,1,float16,float16,0,0.010497599840164185
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,16,1,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,16,2,128,1,float16,fp8,0,0.010622400045394897
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,16,2,128,1,fp8,fp8,0,0.010436800122261048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,16,4,128,1,float16,float16,0,0.010463999956846238
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,16,4,128,1,float16,fp8,0,0.010526400059461594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,16,8,128,1,float16,float16,0,0.010492800176143647
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,16,4,128,1,fp8,fp8,0,0.010744000226259232
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,16,8,128,1,float16,fp8,0,0.010409600287675857
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,16,8,128,1,fp8,fp8,0,0.010603199899196624
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,16,16,128,1,float16,fp8,0,0.010463999956846238
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,16,16,128,1,float16,float16,0,0.010751999914646149
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,16,16,128,1,fp8,fp8,0,0.010526400059461594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,16,1,128,1,float16,float16,0,0.010406400263309478
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,16,1,128,1,float16,fp8,0,0.010564800351858139
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,16,1,128,1,fp8,fp8,0,0.010718400031328202
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,16,2,128,1,float16,float16,0,0.010375999659299851
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,16,2,128,1,float16,fp8,0,0.010529600083827972
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,16,4,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,16,2,128,1,fp8,fp8,0,0.010790400207042694
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,16,4,128,1,float16,fp8,0,0.010494399815797806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,16,4,128,1,fp8,fp8,0,0.01037919968366623
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,16,8,128,1,float16,fp8,0,0.01064319983124733
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,16,8,128,1,float16,float16,0,0.010694400221109391
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,16,8,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,16,16,128,1,float16,float16,0,0.010516799986362457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,16,16,128,1,float16,fp8,0,0.01063840016722679
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,16,16,128,1,fp8,fp8,0,0.010436800122261048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,16,1,128,1,float16,float16,0,0.01045600026845932
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,16,1,128,1,float16,fp8,0,0.010704000294208527
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,16,1,128,1,fp8,fp8,0,0.010424000024795533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,16,2,128,1,float16,float16,0,0.010414399951696397
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,16,2,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,16,4,128,1,float16,float16,0,0.010470400005578995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,16,2,128,1,float16,fp8,0,0.010678400099277497
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,16,4,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,16,4,128,1,fp8,fp8,0,0.010526400059461594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,16,8,128,1,float16,float16,0,0.010593599826097488
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,16,8,128,1,float16,fp8,0,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,16,8,128,1,fp8,fp8,0,0.010503999888896942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,16,1,128,1,float16,float16,0,0.09292320013046265
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,16,1,128,1,float16,fp8,0,0.09492319822311401
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,16,1,128,1,fp8,fp8,0,0.09484320282936096
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,16,2,128,1,float16,float16,0,0.09299520254135132
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,16,2,128,1,float16,fp8,0,0.09450719952583313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,16,2,128,1,fp8,fp8,0,0.09477279782295227
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,16,4,128,1,float16,float16,0,0.09708319902420044
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,16,4,128,1,float16,fp8,0,0.09479680061340331
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,16,4,128,1,fp8,fp8,0,0.09449759721755982
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,16,8,128,1,float16,float16,0,0.10183199644088745
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,16,8,128,1,float16,fp8,0,0.09454079866409301
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,16,8,128,1,fp8,fp8,0,0.09468799829483032
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,16,16,128,1,float16,float16,0,0.05780799984931946
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,16,16,128,1,float16,fp8,0,0.05558879971504212
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,16,16,128,1,fp8,fp8,0,0.05555840134620667
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,16,1,128,1,float16,float16,0,0.05168319940567016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,16,1,128,1,float16,fp8,0,0.05140159726142883
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,16,1,128,1,fp8,fp8,0,0.05151680111885071
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,16,2,128,1,float16,float16,0,0.051583999395370485
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,16,2,128,1,float16,fp8,0,0.051412802934646604
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,16,2,128,1,fp8,fp8,0,0.05151680111885071
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,16,4,128,1,float16,float16,0,0.052331197261810306
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,16,4,128,1,float16,fp8,0,0.051472002267837526
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,16,4,128,1,fp8,fp8,0,0.0514735996723175
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,16,8,128,1,float16,fp8,0,0.051475197076797485
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,16,8,128,1,fp8,fp8,0,0.051611202955245974
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,16,8,128,1,float16,float16,0,0.05454720258712768
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,16,16,128,1,float16,float16,0,0.03298720121383667
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,16,16,128,1,float16,fp8,0,0.03289920091629028
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,16,16,128,1,fp8,fp8,0,0.03304159939289093
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,16,1,128,1,float16,float16,0,0.03081600069999695
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,16,1,128,1,float16,fp8,0,0.030928000807762146
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,16,1,128,1,fp8,fp8,0,0.030956798791885377
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,16,2,128,1,float16,float16,0,0.03078399896621704
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,16,2,128,1,float16,fp8,0,0.0308896005153656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,16,2,128,1,fp8,fp8,0,0.03097760081291199
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,16,4,128,1,float16,float16,0,0.03094879984855652
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,16,4,128,1,float16,fp8,0,0.03087199926376343
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,16,4,128,1,fp8,fp8,0,0.03096640110015869
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,16,8,128,1,float16,float16,0,0.031297600269317626
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,16,8,128,1,float16,fp8,0,0.030846399068832398
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,16,16,128,1,float16,float16,0,0.020652799308300017
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,16,8,128,1,fp8,fp8,0,0.03097119927406311
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,16,16,128,1,float16,fp8,0,0.020737600326538087
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,16,16,128,1,fp8,fp8,0,0.02070080041885376
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,16,1,128,1,float16,float16,0,0.020633600652217865
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,16,1,128,1,float16,fp8,0,0.020603199303150178
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,16,2,128,1,float16,float16,0,0.020684799551963805
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,16,2,128,1,float16,fp8,0,0.020552000403404234
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,16,1,128,1,fp8,fp8,0,0.020380799472332
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,16,2,128,1,fp8,fp8,0,0.020691199600696562
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,16,4,128,1,float16,float16,0,0.020588800311088562
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,16,4,128,1,float16,fp8,0,0.020641599595546723
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,16,4,128,1,fp8,fp8,0,0.020665599405765532
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,16,8,128,1,float16,float16,0,0.020716799795627593
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,16,8,128,1,float16,fp8,0,0.02067680060863495
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,16,8,128,1,fp8,fp8,0,0.02064319998025894
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,16,16,128,1,float16,float16,0,0.015118399262428283
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,16,16,128,1,float16,fp8,0,0.014494399726390838
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,16,16,128,1,fp8,fp8,0,0.014691199362277984
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,16,1,128,1,float16,float16,0,0.014569599926471711
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,16,1,128,1,float16,fp8,0,0.014497600495815277
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,16,1,128,1,fp8,fp8,0,0.014470399916172027
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,16,2,128,1,float16,float16,0,0.014392000436782838
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,16,2,128,1,float16,fp8,0,0.014504000544548035
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,16,2,128,1,fp8,fp8,0,0.01449279934167862
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,16,4,128,1,float16,float16,0,0.014523200690746307
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,16,4,128,1,float16,fp8,0,0.014539200067520141
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,16,4,128,1,fp8,fp8,0,0.014510400593280792
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,16,8,128,1,float16,float16,0,0.01454080045223236
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,16,8,128,1,float16,fp8,0,0.014531199634075165
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,16,8,128,1,fp8,fp8,0,0.014572800695896148
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,16,16,128,1,float16,float16,0,0.010462400317192078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,16,16,128,1,float16,fp8,0,0.012406399846076966
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,16,16,128,1,fp8,fp8,0,0.010440000146627427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,16,1,128,1,float16,fp8,0,0.010529600083827972
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,16,1,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,16,1,128,1,float16,float16,0,0.010796800255775452
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,16,2,128,1,float16,float16,0,0.010494399815797806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,16,2,128,1,float16,fp8,0,0.010438399761915207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,16,2,128,1,fp8,fp8,0,0.010665600001811982
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,16,4,128,1,float16,float16,0,0.010620799660682679
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,16,4,128,1,float16,fp8,0,0.010526400059461594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,16,8,128,1,float16,float16,0,0.01045600026845932
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,16,4,128,1,fp8,fp8,0,0.010673599690198899
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,16,8,128,1,float16,fp8,0,0.01045759990811348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,16,8,128,1,fp8,fp8,0,0.010515200346708298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,16,16,128,1,float16,fp8,0,0.010419200360774993
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,16,16,128,1,float16,float16,0,0.010785599797964096
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,16,1,128,1,float16,fp8,0,0.010440000146627427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,16,16,128,1,fp8,fp8,0,0.010515200346708298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,16,1,128,1,float16,float16,0,0.010412800312042236
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,16,1,128,1,fp8,fp8,0,0.010440000146627427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,16,2,128,1,float16,float16,0,0.010553599894046783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,16,2,128,1,float16,fp8,0,0.01043040007352829
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,16,2,128,1,fp8,fp8,0,0.010531199723482132
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,16,4,128,1,float16,float16,0,0.01050880029797554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,16,4,128,1,float16,fp8,0,0.010596799850463866
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,16,4,128,1,fp8,fp8,0,0.010475199669599533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,16,8,128,1,float16,float16,0,0.010540799796581268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,16,8,128,1,float16,fp8,0,0.010452800244092942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,16,16,128,1,float16,float16,0,0.010555200278759003
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,16,8,128,1,fp8,fp8,0,0.010468800365924836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,16,16,128,1,float16,fp8,0,0.010454399883747101
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,16,16,128,1,fp8,fp8,0,0.010500799864530563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,16,1,128,1,float16,fp8,0,0.010419200360774993
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,16,1,128,1,float16,float16,0,0.01058880016207695
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,16,1,128,1,fp8,fp8,0,0.010492800176143647
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,16,2,128,1,float16,float16,0,0.01043360009789467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,16,2,128,1,float16,fp8,0,0.01061279997229576
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,16,2,128,1,fp8,fp8,0,0.010547199845314026
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,16,4,128,1,float16,float16,0,0.010583999752998351
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,16,4,128,1,fp8,fp8,0,0.010503999888896942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,16,8,128,1,float16,float16,0,0.010443200170993806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,16,4,128,1,float16,fp8,0,0.010627199709415436
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,16,8,128,1,float16,fp8,0,0.010351999849081039
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,16,8,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,16,16,128,1,float16,fp8,0,0.01029760017991066
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,16,16,128,1,float16,float16,0,0.010580799728631973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,16,16,128,1,fp8,fp8,0,0.010447999835014344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,16,1,128,1,float16,float16,0,0.010364799946546554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,16,1,128,1,fp8,fp8,0,0.010447999835014344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,16,1,128,1,float16,fp8,0,0.010502400249242783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,16,2,128,1,float16,float16,0,0.01048320010304451
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,16,2,128,1,float16,fp8,0,0.010467199981212616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,16,2,128,1,fp8,fp8,0,0.010571199655532836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,16,4,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,16,4,128,1,float16,fp8,0,0.010344000160694122
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,16,4,128,1,fp8,fp8,0,0.010369600355625152
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,16,8,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,16,8,128,1,float16,fp8,0,0.010492800176143647
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,16,8,128,1,fp8,fp8,0,0.010440000146627427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,16,1,128,1,float16,float16,0,0.08996319770812988
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,16,1,128,1,fp8,fp8,0,0.08626719713211059
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,16,1,128,1,float16,fp8,0,0.08628479838371277
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,16,2,128,1,float16,float16,0,0.08969119787216187
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,16,2,128,1,float16,fp8,0,0.08666399717330933
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,16,2,128,1,fp8,fp8,0,0.08654720187187195
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,16,4,128,1,float16,float16,0,0.09057279825210571
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,16,4,128,1,fp8,fp8,0,0.0862879991531372
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,16,4,128,1,float16,fp8,0,0.08679360151290894
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,16,8,128,1,float16,float16,0,0.09404799938201905
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,16,8,128,1,float16,fp8,0,0.08642079830169677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,16,8,128,1,fp8,fp8,0,0.08655840158462524
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,16,16,128,1,float16,float16,0,0.05226399898529053
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,16,16,128,1,float16,fp8,0,0.04984639883041382
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,16,16,128,1,fp8,fp8,0,0.051627200841903684
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,16,1,128,1,float16,float16,0,0.04965119957923889
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,16,1,128,1,float16,fp8,0,0.04733439981937408
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,16,1,128,1,fp8,fp8,0,0.04890719950199127
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,16,2,128,1,float16,float16,0,0.04954560101032257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,16,2,128,1,float16,fp8,0,0.04741599857807159
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,16,4,128,1,float16,float16,0,0.05125759840011597
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,16,2,128,1,fp8,fp8,0,0.04885919988155365
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,16,4,128,1,float16,fp8,0,0.048809599876403806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,16,4,128,1,fp8,fp8,0,0.049235200881958006
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,16,8,128,1,float16,float16,0,0.05139679908752441
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,16,8,128,1,float16,fp8,0,0.04907999932765961
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,16,16,128,1,float16,float16,0,0.030983999371528625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,16,8,128,1,fp8,fp8,0,0.049327999353408813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,16,16,128,1,fp8,fp8,0,0.028942400217056276
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,16,16,128,1,float16,fp8,0,0.02897599935531616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,16,1,128,1,float16,float16,0,0.02892639935016632
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,16,1,128,1,float16,fp8,0,0.02900159955024719
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,16,1,128,1,fp8,fp8,0,0.02882080078125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,16,2,128,1,float16,fp8,0,0.028828799724578857
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,16,2,128,1,float16,float16,0,0.029078400135040282
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,16,4,128,1,float16,float16,0,0.028883200883865357
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,16,2,128,1,fp8,fp8,0,0.028942400217056276
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,16,4,128,1,float16,fp8,0,0.02889919877052307
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,16,4,128,1,fp8,fp8,0,0.028987199068069458
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,16,8,128,1,float16,float16,0,0.03089759945869446
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,16,8,128,1,float16,fp8,0,0.028880000114440918
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,16,16,128,1,float16,float16,0,0.020721599459648132
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,16,8,128,1,fp8,fp8,0,0.029080000519752503
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,16,16,128,1,float16,fp8,0,0.020558400452136992
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,16,16,128,1,fp8,fp8,0,0.020640000700950623
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,16,1,128,1,float16,float16,0,0.018963199853897095
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,16,1,128,1,float16,fp8,0,0.01871040016412735
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,16,1,128,1,fp8,fp8,0,0.018612800538539885
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,16,2,128,1,float16,float16,0,0.01894879937171936
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,16,2,128,1,float16,fp8,0,0.018743999302387238
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,16,2,128,1,fp8,fp8,0,0.01866399943828583
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,16,4,128,1,float16,float16,0,0.0190528005361557
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,16,4,128,1,float16,fp8,0,0.018695999681949616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,16,4,128,1,fp8,fp8,0,0.018636800348758698
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,16,8,128,1,float16,fp8,0,0.018724800646305086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,16,8,128,1,float16,float16,0,0.020755200088024138
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,16,8,128,1,fp8,fp8,0,0.0192671999335289
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,16,16,128,1,float16,float16,0,0.014448000490665436
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,16,16,128,1,fp8,fp8,0,0.01448799967765808
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,16,16,128,1,float16,fp8,0,0.014483200013637542
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,16,1,128,1,float16,float16,0,0.014430400729179383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,16,1,128,1,float16,fp8,0,0.012574400007724761
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,16,1,128,1,fp8,fp8,0,0.014508800208568573
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,16,2,128,1,float16,float16,0,0.014428800344467163
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,16,2,128,1,float16,fp8,0,0.012464000284671784
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,16,2,128,1,fp8,fp8,0,0.014510400593280792
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,16,4,128,1,float16,float16,0,0.01443839967250824
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,16,4,128,1,float16,fp8,0,0.012540799379348756
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,16,4,128,1,fp8,fp8,0,0.014424000680446625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,16,8,128,1,float16,float16,0,0.0144896000623703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,16,8,128,1,float16,fp8,0,0.014531199634075165
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,16,8,128,1,fp8,fp8,0,0.014424000680446625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,16,16,128,1,float16,float16,0,0.011591999977827071
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,16,16,128,1,fp8,fp8,0,0.010552000254392624
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,16,16,128,1,float16,fp8,0,0.010419200360774993
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,16,1,128,1,float16,float16,0,0.010547199845314026
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,16,2,128,1,float16,float16,0,0.01051200032234192
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,16,1,128,1,float16,fp8,0,0.010604800283908844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,16,1,128,1,fp8,fp8,0,0.010486400127410889
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,16,2,128,1,float16,fp8,0,0.010473600029945374
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,16,2,128,1,fp8,fp8,0,0.010548800230026245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,16,4,128,1,float16,float16,0,0.010470400005578995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,16,4,128,1,float16,fp8,0,0.010540799796581268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,16,8,128,1,float16,float16,0,0.010425599664449692
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,16,4,128,1,fp8,fp8,0,0.010513599961996078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,16,8,128,1,float16,fp8,0,0.010576000064611435
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,16,16,128,1,float16,float16,0,0.010608000308275222
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,16,8,128,1,fp8,fp8,0,0.010395199805498124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,16,16,128,1,fp8,fp8,0,0.010444799810647965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,16,1,128,1,float16,float16,0,0.01045759990811348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,16,16,128,1,float16,fp8,0,0.010513599961996078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,16,1,128,1,float16,fp8,0,0.01045759990811348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,16,1,128,1,fp8,fp8,0,0.010387200117111205
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,16,2,128,1,float16,float16,0,0.010412800312042236
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,16,2,128,1,float16,fp8,0,0.010632000118494033
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,16,2,128,1,fp8,fp8,0,0.010440000146627427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,16,4,128,1,float16,float16,0,0.010465600341558457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,16,4,128,1,float16,fp8,0,0.010531199723482132
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,16,4,128,1,fp8,fp8,0,0.010451199859380722
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,16,8,128,1,float16,float16,0,0.010361599922180175
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,16,8,128,1,float16,fp8,0,0.010507199913263321
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,16,8,128,1,fp8,fp8,0,0.010364799946546554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,16,16,128,1,float16,float16,0,0.01053759977221489
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,16,16,128,1,float16,fp8,0,0.010468800365924836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,16,16,128,1,fp8,fp8,0,0.01040479987859726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,16,1,128,1,float16,float16,0,0.010366400331258773
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,16,1,128,1,float16,fp8,0,0.0104032002389431
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,16,1,128,1,fp8,fp8,0,0.010409600287675857
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,16,2,128,1,float16,float16,0,0.010363200306892395
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,16,2,128,1,float16,fp8,0,0.010302399843931198
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,16,2,128,1,fp8,fp8,0,0.010444799810647965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,16,4,128,1,float16,float16,0,0.010520000010728836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,16,4,128,1,float16,fp8,0,0.010452800244092942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,16,4,128,1,fp8,fp8,0,0.010384000092744827
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,16,8,128,1,float16,float16,0,0.01037919968366623
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,16,8,128,1,float16,fp8,0,0.01034879982471466
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,16,8,128,1,fp8,fp8,0,0.0104032002389431
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,16,16,128,1,float16,float16,0,0.010572800040245056
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,16,16,128,1,float16,fp8,0,0.01032319962978363
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,16,1,128,1,float16,float16,0,0.010299199819564819
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,16,16,128,1,fp8,fp8,0,0.010515200346708298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,16,1,128,1,float16,fp8,0,0.01037919968366623
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,16,1,128,1,fp8,fp8,0,0.01045759990811348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,16,2,128,1,float16,float16,0,0.0105103999376297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,16,2,128,1,fp8,fp8,0,0.010500799864530563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,16,2,128,1,float16,fp8,0,0.010364799946546554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,16,4,128,1,float16,fp8,0,0.010331200063228607
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,16,4,128,1,float16,float16,0,0.010553599894046783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,16,4,128,1,fp8,fp8,0,0.01029599979519844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,16,8,128,1,float16,float16,0,0.010487999767065048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,16,8,128,1,float16,fp8,0,0.010475199669599533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,16,8,128,1,fp8,fp8,0,0.010503999888896942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,16,1,128,1,float16,fp8,0,0.08214880228042602
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,16,1,128,1,float16,float16,0,0.08844799995422363
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,16,1,128,1,fp8,fp8,0,0.08220000267028808
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,16,2,128,1,float16,float16,0,0.08850560188293458
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,16,2,128,1,fp8,fp8,0,0.08223199844360352
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,16,2,128,1,float16,fp8,0,0.08222079873085023
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,16,4,128,1,float16,float16,0,0.0883903980255127
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,16,4,128,1,float16,fp8,0,0.08220800161361694
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,16,4,128,1,fp8,fp8,0,0.08224639892578126
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,16,8,128,1,float16,float16,0,0.08838080167770386
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,16,8,128,1,float16,fp8,0,0.08224639892578126
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,16,8,128,1,fp8,fp8,0,0.08218880295753479
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,16,16,128,1,float16,float16,0,0.04948959946632385
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,16,16,128,1,float16,fp8,0,0.04668320119380951
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,16,1,128,1,float16,float16,0,0.04945760071277618
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,16,16,128,1,fp8,fp8,0,0.04648480117321015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,16,1,128,1,float16,fp8,0,0.047147199511528015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,16,1,128,1,fp8,fp8,0,0.04642719924449921
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,16,2,128,1,float16,fp8,0,0.04610080122947693
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,16,2,128,1,float16,float16,0,0.04968000054359436
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,16,2,128,1,fp8,fp8,0,0.04547840058803558
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,16,4,128,1,float16,float16,0,0.04942240118980408
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,16,4,128,1,float16,fp8,0,0.04557119905948639
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,16,4,128,1,fp8,fp8,0,0.0454255998134613
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,16,8,128,1,float16,float16,0,0.04938080012798309
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,16,8,128,1,fp8,fp8,0,0.04572640061378479
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,16,8,128,1,float16,fp8,0,0.04566240012645721
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,16,16,128,1,float16,float16,0,0.028881600499153136
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,16,16,128,1,float16,fp8,0,0.026910400390625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,16,1,128,1,float16,float16,0,0.028968000411987306
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,16,16,128,1,fp8,fp8,0,0.027006399631500245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,16,1,128,1,float16,fp8,0,0.02696320116519928
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,16,2,128,1,float16,float16,0,0.029158401489257812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,16,1,128,1,fp8,fp8,0,0.02691679894924164
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,16,2,128,1,float16,fp8,0,0.027091199159622194
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,16,2,128,1,fp8,fp8,0,0.026998400688171387
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,16,4,128,1,float16,float16,0,0.028839999437332155
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,16,4,128,1,float16,fp8,0,0.026948800683021544
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,16,8,128,1,float16,float16,0,0.028921601176261903
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,16,4,128,1,fp8,fp8,0,0.026940798759460448
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,16,8,128,1,float16,fp8,0,0.02688480019569397
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,16,8,128,1,fp8,fp8,0,0.026811200380325317
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,16,16,128,1,float16,float16,0,0.018798400461673737
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,16,16,128,1,float16,fp8,0,0.01876640021800995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,16,16,128,1,fp8,fp8,0,0.01857600063085556
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,16,1,128,1,float16,fp8,0,0.018753600120544434
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,16,1,128,1,float16,float16,0,0.01884160041809082
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,16,1,128,1,fp8,fp8,0,0.018612800538539885
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,16,2,128,1,float16,float16,0,0.018718400597572328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,16,2,128,1,float16,fp8,0,0.018595199286937713
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,16,2,128,1,fp8,fp8,0,0.01860000044107437
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,16,4,128,1,float16,float16,0,0.01923519968986511
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,16,4,128,1,fp8,fp8,0,0.018644799292087556
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,16,4,128,1,float16,fp8,0,0.018644799292087556
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,16,8,128,1,float16,float16,0,0.019415999948978423
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,16,8,128,1,float16,fp8,0,0.018603199720382692
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,16,8,128,1,fp8,fp8,0,0.01863519996404648
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,16,16,128,1,float16,float16,0,0.014572800695896148
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,16,16,128,1,float16,fp8,0,0.01257600039243698
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,16,16,128,1,fp8,fp8,0,0.014511999487876893
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,16,1,128,1,float16,float16,0,0.01451680064201355
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,16,1,128,1,float16,fp8,0,0.012544000148773193
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,16,1,128,1,fp8,fp8,0,0.01266240030527115
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,16,2,128,1,float16,float16,0,0.014619199931621552
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,16,2,128,1,float16,fp8,0,0.012559999525547028
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,16,2,128,1,fp8,fp8,0,0.012569600343704223
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,16,4,128,1,float16,float16,0,0.014521600306034088
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,16,4,128,1,float16,fp8,0,0.012545600533485413
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,16,4,128,1,fp8,fp8,0,0.012468799948692322
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,16,8,128,1,float16,float16,0,0.014694400131702423
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,16,8,128,1,float16,fp8,0,0.01250240057706833
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,16,16,128,1,float16,float16,0,0.012052799761295318
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,16,8,128,1,fp8,fp8,0,0.012558400630950928
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,16,16,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,16,1,128,1,float16,float16,0,0.010440000146627427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,16,16,128,1,fp8,fp8,0,0.010471999645233154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,16,1,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,16,1,128,1,fp8,fp8,0,0.010384000092744827
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,16,2,128,1,float16,float16,0,0.012361600250005721
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,16,2,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,16,2,128,1,fp8,fp8,0,0.010425599664449692
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,16,4,128,1,float16,float16,0,0.012392000108957291
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,16,4,128,1,float16,fp8,0,0.01043360009789467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,16,4,128,1,fp8,fp8,0,0.010393600165843963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,16,8,128,1,float16,float16,0,0.012332800030708312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,16,8,128,1,float16,fp8,0,0.010407999902963639
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,16,8,128,1,fp8,fp8,0,0.010407999902963639
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,16,16,128,1,float16,fp8,0,0.010335999727249145
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,16,16,128,1,float16,float16,0,0.010491199791431427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,16,16,128,1,fp8,fp8,0,0.01037440001964569
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,16,1,128,1,float16,float16,0,0.010398399829864503
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,16,1,128,1,float16,fp8,0,0.010401599854230881
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,16,1,128,1,fp8,fp8,0,0.01040479987859726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,16,2,128,1,float16,float16,0,0.01037919968366623
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,16,2,128,1,float16,fp8,0,0.010313600301742554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,16,2,128,1,fp8,fp8,0,0.01045759990811348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,16,4,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,16,4,128,1,float16,fp8,0,0.01045759990811348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,16,4,128,1,fp8,fp8,0,0.010364799946546554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,16,8,128,1,float16,float16,0,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,16,8,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,16,8,128,1,fp8,fp8,0,0.0104032002389431
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,16,16,128,1,float16,float16,0,0.010524799674749374
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,16,16,128,1,fp8,fp8,0,0.010451199859380722
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,16,1,128,1,float16,float16,0,0.010332799702882766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,16,16,128,1,float16,fp8,0,0.010543999820947647
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,16,1,128,1,float16,fp8,0,0.010364799946546554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,16,1,128,1,fp8,fp8,0,0.010358399897813796
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,16,2,128,1,float16,float16,0,0.010718400031328202
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,16,2,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,16,2,128,1,fp8,fp8,0,0.010382399708032609
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,16,4,128,1,float16,float16,0,0.010764800012111664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,16,4,128,1,float16,fp8,0,0.010411199927330018
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,16,4,128,1,fp8,fp8,0,0.010412800312042236
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,16,8,128,1,float16,float16,0,0.011240000277757645
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,16,8,128,1,float16,fp8,0,0.010401599854230881
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,16,8,128,1,fp8,fp8,0,0.010398399829864503
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,16,16,128,1,float16,fp8,0,0.01037440001964569
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,16,16,128,1,fp8,fp8,0,0.010427200049161912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,16,16,128,1,float16,float16,0,0.010649599879980088
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,16,1,128,1,float16,float16,0,0.010377600044012069
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,16,1,128,1,float16,fp8,0,0.010465600341558457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,16,1,128,1,fp8,fp8,0,0.010526400059461594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,16,2,128,1,float16,float16,0,0.010527999699115753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,16,2,128,1,float16,fp8,0,0.010363200306892395
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,16,2,128,1,fp8,fp8,0,0.010569600015878677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,16,4,128,1,float16,float16,0,0.010449600219726563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,16,4,128,1,float16,fp8,0,0.010344000160694122
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,16,8,128,1,float16,float16,0,0.01043360009789467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,16,4,128,1,fp8,fp8,0,0.01045759990811348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,16,8,128,1,float16,fp8,0,0.01048159971833229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,16,8,128,1,fp8,fp8,0,0.01043040007352829
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,12,1,128,1,float16,fp8,0,4.109088134765625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,12,1,128,1,fp8,fp8,0,4.137468719482422
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,12,1,128,1,float16,float16,0,6.388159942626953
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,12,2,128,1,float16,fp8,0,4.0438190460205075
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,12,2,128,1,fp8,fp8,0,4.070971298217773
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,12,2,128,1,float16,float16,0,6.641763305664062
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,12,4,128,1,float16,fp8,0,4.189923095703125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,12,4,128,1,float16,float16,0,6.947428894042969
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,12,4,128,1,fp8,fp8,0,4.078870391845703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,12,1,128,1,float16,float16,0,2.7250688552856444
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,12,12,128,1,float16,fp8,0,2.1527759552001955
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,12,12,128,1,fp8,fp8,0,2.1433135986328127
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,12,1,128,1,float16,fp8,0,2.446455955505371
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,12,1,128,1,fp8,fp8,0,2.0925151824951174
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,12,2,128,1,float16,fp8,0,2.1012496948242188
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,12,2,128,1,float16,float16,0,2.702475166320801
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,12,2,128,1,fp8,fp8,0,2.1005552291870115
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,12,4,128,1,float16,float16,0,2.412473678588867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,12,4,128,1,float16,fp8,0,2.3509567260742186
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,12,4,128,1,fp8,fp8,0,2.1037519454956053
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,12,12,128,1,float16,fp8,0,1.1554400444030761
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,12,12,128,1,fp8,fp8,0,1.1577520370483398
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,12,1,128,1,float16,float16,0,1.2304911613464355
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,12,1,128,1,float16,fp8,0,1.112502384185791
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,12,1,128,1,fp8,fp8,0,1.1994704246520995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,12,2,128,1,float16,fp8,0,1.137822437286377
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,12,2,128,1,float16,float16,0,1.233839988708496
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,12,2,128,1,fp8,fp8,0,1.1050751686096192
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,12,4,128,1,float16,float16,0,1.3392928123474122
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,12,4,128,1,float16,fp8,0,1.1042223930358888
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,12,4,128,1,fp8,fp8,0,1.2934255599975586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,12,12,128,1,float16,fp8,0,0.6930624008178711
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,12,12,128,1,fp8,fp8,0,0.6416480064392089
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,12,1,128,1,float16,float16,0,0.6920671939849854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,12,1,128,1,float16,fp8,0,0.6270736217498779
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,12,1,128,1,fp8,fp8,0,0.6226496219635009
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,12,2,128,1,float16,float16,0,0.6939184188842773
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,12,2,128,1,float16,fp8,0,0.6260672092437745
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,12,2,128,1,fp8,fp8,0,0.6271215915679932
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,12,4,128,1,float16,float16,0,0.6986048221588135
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,12,4,128,1,float16,fp8,0,0.6268256187438965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,12,4,128,1,fp8,fp8,0,0.6238016128540039
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,12,1,128,1,float16,float16,0,3.337985610961914
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,12,1,128,1,float16,fp8,0,2.551691246032715
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,12,1,128,1,fp8,fp8,0,2.457475280761719
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,12,2,128,1,float16,float16,0,2.8729375839233398
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,12,2,128,1,float16,fp8,0,2.6735759735107423
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,12,2,128,1,fp8,fp8,0,2.4629423141479494
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,12,4,128,1,float16,float16,0,3.016584014892578
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,12,4,128,1,float16,fp8,0,2.4843215942382812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,12,4,128,1,fp8,fp8,0,2.4422895431518556
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,12,12,128,1,float16,float16,0,2.59957275390625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,12,12,128,1,float16,fp8,0,1.4684752464294433
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,12,12,128,1,fp8,fp8,0,1.299448013305664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,12,12,128,1,float16,float16,0,1.258788776397705
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,12,1,128,1,float16,fp8,0,1.253667163848877
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,12,1,128,1,float16,float16,0,1.465772819519043
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,12,1,128,1,fp8,fp8,0,1.2463199615478515
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,12,2,128,1,float16,float16,0,1.7600416183471679
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,12,12,128,1,float16,float16,0,0.7179279804229737
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,12,2,128,1,float16,fp8,0,1.2465007781982422
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,12,4,128,1,float16,float16,0,1.3874879837036134
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,12,2,128,1,fp8,fp8,0,1.646339225769043
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,12,4,128,1,float16,fp8,0,1.24203519821167
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,12,12,128,1,float16,float16,0,0.7792768001556396
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,12,4,128,1,fp8,fp8,0,1.255179214477539
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,12,12,128,1,float16,fp8,0,0.7045055866241455
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,12,12,128,1,fp8,fp8,0,0.709603214263916
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,12,1,128,1,float16,float16,0,0.7461040019989014
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,12,12,128,1,float16,float16,0,1.430076789855957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,12,1,128,1,float16,fp8,0,0.683519983291626
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,12,1,128,1,fp8,fp8,0,0.7216320037841797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,12,2,128,1,float16,float16,0,0.7497024059295654
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,12,2,128,1,float16,fp8,0,0.684876823425293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,12,2,128,1,fp8,fp8,0,0.6782127857208252
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,12,4,128,1,float16,fp8,0,0.6842576026916504
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,12,4,128,1,float16,float16,0,0.7658959865570069
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,12,12,128,1,float16,float16,0,0.4533872127532959
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,12,12,128,1,float16,fp8,0,0.40667200088500977
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,12,4,128,1,fp8,fp8,0,0.6769968032836914
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,12,12,128,1,fp8,fp8,0,0.4094831943511963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,12,1,128,1,float16,fp8,0,0.392630410194397
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,12,1,128,1,float16,float16,0,0.4303936004638672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,12,1,128,1,fp8,fp8,0,0.3958368062973022
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,12,2,128,1,float16,fp8,0,0.3923583984375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,12,2,128,1,float16,float16,0,0.42724318504333497
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,12,2,128,1,fp8,fp8,0,0.3923520088195801
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,12,4,128,1,float16,float16,0,0.44120159149169924
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,12,4,128,1,float16,fp8,0,0.3915055990219116
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,12,4,128,1,fp8,fp8,0,0.38878560066223145
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,12,1,128,1,float16,fp8,0,1.7369600296020509
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,12,1,128,1,fp8,fp8,0,1.721892738342285
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,12,2,128,1,float16,float16,0,1.842416000366211
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,12,1,128,1,float16,float16,0,2.097447967529297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,12,2,128,1,float16,fp8,0,1.7322912216186523
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,12,2,128,1,fp8,fp8,0,1.7247743606567383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,12,4,128,1,float16,fp8,0,2.0386735916137697
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,12,4,128,1,float16,float16,0,2.5652175903320313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,12,12,128,1,float16,float16,0,1.0487839698791503
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,12,12,128,1,float16,fp8,0,0.9474063873291015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,12,4,128,1,fp8,fp8,0,1.7313152313232423
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,12,12,128,1,fp8,fp8,0,1.094279956817627
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,12,1,128,1,float16,float16,0,1.0803888320922852
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,12,1,128,1,fp8,fp8,0,0.9249520301818848
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,12,1,128,1,float16,fp8,0,0.9090031623840332
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,12,2,128,1,float16,float16,0,0.9912176132202148
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,12,2,128,1,float16,fp8,0,1.2116304397583009
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,12,2,128,1,fp8,fp8,0,0.9159952163696289
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,12,4,128,1,float16,float16,0,1.015454387664795
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,12,4,128,1,float16,fp8,0,0.9244015693664551
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,12,12,128,1,float16,float16,0,0.5876495838165283
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,12,12,128,1,float16,fp8,0,0.5558752059936524
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,12,4,128,1,fp8,fp8,0,0.9074000358581543
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,12,12,128,1,fp8,fp8,0,0.5246319770812988
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,12,1,128,1,float16,float16,0,0.5596240043640137
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,12,1,128,1,float16,fp8,0,0.5296048164367676
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,12,1,128,1,fp8,fp8,0,0.5050735950469971
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,12,2,128,1,float16,float16,0,0.5494592189788818
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,12,2,128,1,float16,fp8,0,0.5101168155670166
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,12,2,128,1,fp8,fp8,0,0.500439977645874
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,12,4,128,1,float16,float16,0,0.5604320049285889
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,12,4,128,1,float16,fp8,0,0.5034656047821044
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,12,12,128,1,float16,float16,0,0.3369472026824951
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,12,12,128,1,float16,fp8,0,0.30600159168243407
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,12,4,128,1,fp8,fp8,0,0.5065008163452148
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,12,12,128,1,fp8,fp8,0,0.3072304010391235
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,12,1,128,1,float16,float16,0,0.3195456027984619
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,12,1,128,1,float16,fp8,0,0.29800639152526853
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,12,1,128,1,fp8,fp8,0,0.2981935977935791
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,12,2,128,1,float16,float16,0,0.3198863983154297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,12,2,128,1,float16,fp8,0,0.29435999393463136
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,12,2,128,1,fp8,fp8,0,0.2960655927658081
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,12,4,128,1,float16,float16,0,0.3288719892501831
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,12,4,128,1,float16,fp8,0,0.29611198902130126
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,12,4,128,1,fp8,fp8,0,0.29518558979034426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,12,1,128,1,float16,float16,0,2.4591455459594727
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,12,1,128,1,float16,fp8,0,2.268110466003418
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,12,1,128,1,fp8,fp8,0,2.2545856475830077
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,12,2,128,1,float16,float16,0,2.9854848861694334
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,12,2,128,1,fp8,fp8,0,2.276644706726074
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,12,2,128,1,float16,fp8,0,2.29616641998291
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,12,4,128,1,float16,float16,0,2.820852851867676
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,12,4,128,1,float16,fp8,0,2.7360191345214844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,12,12,128,1,float16,float16,0,1.3452943801879882
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,12,12,128,1,float16,fp8,0,1.6994592666625976
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,12,4,128,1,fp8,fp8,0,2.2701887130737304
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,12,12,128,1,fp8,fp8,0,1.2765583992004395
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,12,1,128,1,float16,float16,0,1.5044063568115233
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,12,1,128,1,float16,fp8,0,1.163865566253662
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,12,1,128,1,fp8,fp8,0,1.206827163696289
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,12,2,128,1,float16,float16,0,1.2526576042175293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,12,2,128,1,float16,fp8,0,1.537052822113037
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,12,2,128,1,fp8,fp8,0,1.1695792198181152
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,12,4,128,1,float16,float16,0,1.2902432441711427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,12,12,128,1,float16,float16,0,0.7155792236328125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,12,12,128,1,float16,fp8,0,0.7494095802307129
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,12,4,128,1,float16,fp8,0,1.8150304794311523
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,12,4,128,1,fp8,fp8,0,1.162275218963623
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,12,12,128,1,fp8,fp8,0,0.6586080074310303
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,12,1,128,1,float16,float16,0,0.690393590927124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,12,1,128,1,float16,fp8,0,0.663640022277832
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,12,1,128,1,fp8,fp8,0,0.6290703773498535
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,12,2,128,1,float16,float16,0,0.6734992027282715
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,12,2,128,1,float16,fp8,0,0.6347263813018799
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,12,2,128,1,fp8,fp8,0,0.6490528106689453
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,12,4,128,1,float16,float16,0,0.6815552234649658
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,12,4,128,1,float16,fp8,0,0.6283455848693847
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,12,4,128,1,fp8,fp8,0,0.6214000225067139
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,12,12,128,1,float16,float16,0,0.4004848003387451
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,12,12,128,1,float16,fp8,0,0.36993119716644285
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,12,12,128,1,fp8,fp8,0,0.37026081085205076
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,12,1,128,1,float16,float16,0,0.38010239601135254
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,12,1,128,1,float16,fp8,0,0.35079519748687743
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,12,1,128,1,fp8,fp8,0,0.3504688024520874
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,12,2,128,1,float16,fp8,0,0.3511312007904053
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,12,2,128,1,float16,float16,0,0.3796880006790161
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,12,2,128,1,fp8,fp8,0,0.3505151987075806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,12,4,128,1,float16,float16,0,0.38917601108551025
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,12,4,128,1,float16,fp8,0,0.3505759954452515
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,12,4,128,1,fp8,fp8,0,0.35225439071655273
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,12,12,128,1,float16,fp8,0,0.21859679222106934
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,12,12,128,1,float16,float16,0,0.2380687952041626
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,12,12,128,1,fp8,fp8,0,0.22204959392547607
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,12,1,128,1,float16,float16,0,0.22465920448303223
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,12,1,128,1,float16,fp8,0,0.20856161117553712
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,12,1,128,1,fp8,fp8,0,0.2092832088470459
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,12,2,128,1,float16,float16,0,0.2244335889816284
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,12,2,128,1,float16,fp8,0,0.20886399745941162
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,12,2,128,1,fp8,fp8,0,0.20682239532470703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,12,4,128,1,float16,fp8,0,0.20975520610809326
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,12,4,128,1,float16,float16,0,0.22619519233703614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,12,4,128,1,fp8,fp8,0,0.2069391965866089
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,12,1,128,1,float16,fp8,0,1.361665630340576
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,12,1,128,1,fp8,fp8,0,1.3590304374694824
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,12,1,128,1,float16,float16,0,1.5565263748168945
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,12,2,128,1,float16,fp8,0,1.354740810394287
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,12,2,128,1,float16,float16,0,1.4592080116271973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,12,2,128,1,fp8,fp8,0,1.5769519805908203
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,12,4,128,1,float16,float16,0,1.4529151916503906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,12,12,128,1,float16,float16,0,0.8370559692382813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,12,4,128,1,float16,fp8,0,1.3565584182739259
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,12,12,128,1,float16,fp8,0,0.7651679992675782
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,12,4,128,1,fp8,fp8,0,1.5880928039550781
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,12,12,128,1,fp8,fp8,0,0.7963488101959229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,12,1,128,1,float16,float16,0,0.7599487781524659
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,12,1,128,1,fp8,fp8,0,0.7183983802795411
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,12,1,128,1,float16,fp8,0,0.9342096328735352
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,12,2,128,1,float16,float16,0,0.770359992980957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,12,2,128,1,float16,fp8,0,0.7148831844329834
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,12,2,128,1,fp8,fp8,0,0.7181824207305908
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,12,4,128,1,float16,float16,0,0.7743120193481445
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,12,4,128,1,fp8,fp8,0,0.7213744163513184
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,12,4,128,1,float16,fp8,0,0.8383855819702148
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,12,12,128,1,float16,float16,0,0.44764318466186526
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,12,12,128,1,float16,fp8,0,0.516107177734375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,12,12,128,1,fp8,fp8,0,0.41683201789855956
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,12,1,128,1,float16,fp8,0,0.39231200218200685
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,12,1,128,1,float16,float16,0,0.46762881278991697
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,12,1,128,1,fp8,fp8,0,0.4872704029083252
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,12,2,128,1,float16,float16,0,0.41748800277709963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,12,2,128,1,float16,fp8,0,0.3921695947647095
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,12,2,128,1,fp8,fp8,0,0.39556159973144533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,12,4,128,1,float16,float16,0,0.42608962059020994
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,12,12,128,1,float16,float16,0,0.26002399921417235
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,12,4,128,1,float16,fp8,0,0.3886240005493164
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,12,4,128,1,fp8,fp8,0,0.3887792110443115
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,12,12,128,1,float16,fp8,0,0.23823840618133546
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,12,12,128,1,fp8,fp8,0,0.2398751974105835
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,12,1,128,1,float16,float16,0,0.2369472026824951
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,12,1,128,1,float16,fp8,0,0.22283520698547363
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,12,1,128,1,fp8,fp8,0,0.2243504047393799
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,12,2,128,1,float16,float16,0,0.24010560512542725
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,12,2,128,1,float16,fp8,0,0.22454559803009033
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,12,2,128,1,fp8,fp8,0,0.22299840450286865
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,12,4,128,1,float16,float16,0,0.2430720090866089
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,12,12,128,1,float16,float16,0,0.157259202003479
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,12,4,128,1,float16,fp8,0,0.22515199184417725
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,12,4,128,1,fp8,fp8,0,0.22232799530029296
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,12,12,128,1,float16,fp8,0,0.14711999893188477
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,12,12,128,1,fp8,fp8,0,0.14539200067520142
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,12,1,128,1,float16,float16,0,0.1492176055908203
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,12,1,128,1,float16,fp8,0,0.14176160097122192
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,12,2,128,1,float16,float16,0,0.14833279848098754
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,12,2,128,1,float16,fp8,0,0.13965120315551757
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,12,2,128,1,fp8,fp8,0,0.14103039503097534
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,12,1,128,1,fp8,fp8,0,0.14043840169906616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,12,4,128,1,float16,float16,0,0.15016959905624389
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,12,4,128,1,float16,fp8,0,0.14071199893951417
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,12,4,128,1,fp8,fp8,0,0.13973920345306395
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,12,1,128,1,float16,float16,0,1.348310375213623
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,12,1,128,1,float16,fp8,0,1.3345184326171875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,12,1,128,1,fp8,fp8,0,1.3394816398620606
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,12,2,128,1,float16,float16,0,1.3486576080322266
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,12,2,128,1,fp8,fp8,0,1.332096004486084
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,12,2,128,1,float16,fp8,0,1.5654543876647948
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,12,4,128,1,float16,fp8,0,1.3488032341003418
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,12,4,128,1,float16,float16,0,1.4184176445007324
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,12,12,128,1,float16,float16,0,0.8002736091613769
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,12,12,128,1,float16,fp8,0,0.7632463932037353
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,12,12,128,1,fp8,fp8,0,0.9226271629333496
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,12,4,128,1,fp8,fp8,0,1.3367903709411622
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,12,1,128,1,float16,fp8,0,0.6950672149658204
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,12,1,128,1,float16,float16,0,0.8168751716613769
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,12,1,128,1,fp8,fp8,0,0.6994336128234864
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,12,2,128,1,float16,float16,0,0.7023488044738769
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,12,2,128,1,fp8,fp8,0,0.6984816074371338
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,12,2,128,1,float16,fp8,0,0.9319071769714355
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,12,4,128,1,float16,float16,0,0.7206272125244141
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,12,4,128,1,float16,fp8,0,0.6957759857177734
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,12,12,128,1,float16,float16,0,0.45659518241882324
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,12,4,128,1,fp8,fp8,0,0.9455760002136231
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,12,12,128,1,float16,fp8,0,0.40931038856506347
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,12,12,128,1,fp8,fp8,0,0.45075039863586425
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,12,1,128,1,float16,float16,0,0.38566079139709475
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,12,1,128,1,float16,fp8,0,0.37645280361175537
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,12,1,128,1,fp8,fp8,0,0.41200480461120603
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,12,2,128,1,float16,float16,0,0.3856096029281616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,12,2,128,1,float16,fp8,0,0.3752959966659546
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,12,2,128,1,fp8,fp8,0,0.37093119621276854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,12,4,128,1,float16,float16,0,0.3935247898101807
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,12,4,128,1,float16,fp8,0,0.37363519668579104
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,12,4,128,1,fp8,fp8,0,0.37065279483795166
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,12,12,128,1,float16,float16,0,0.24009280204772948
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,12,12,128,1,float16,fp8,0,0.22775840759277344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,12,12,128,1,fp8,fp8,0,0.2262160062789917
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,12,1,128,1,float16,float16,0,0.21621758937835694
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,12,1,128,1,float16,fp8,0,0.20718560218811036
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,12,1,128,1,fp8,fp8,0,0.2064448118209839
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,12,2,128,1,float16,fp8,0,0.20760159492492675
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,12,2,128,1,float16,float16,0,0.2162559986114502
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,12,2,128,1,fp8,fp8,0,0.20622560977935792
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,12,4,128,1,float16,float16,0,0.22156960964202882
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,12,4,128,1,float16,fp8,0,0.2088144063949585
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,12,4,128,1,fp8,fp8,0,0.20575520992279053
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,12,12,128,1,float16,float16,0,0.13948800563812255
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,12,12,128,1,float16,fp8,0,0.13158559799194336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,12,12,128,1,fp8,fp8,0,0.13065600395202637
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,12,1,128,1,float16,fp8,0,0.12265440225601196
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,12,1,128,1,float16,float16,0,0.12676960229873657
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,12,1,128,1,fp8,fp8,0,0.12115199565887451
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,12,2,128,1,float16,fp8,0,0.12283999919891357
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,12,2,128,1,float16,float16,0,0.12521599531173705
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,12,2,128,1,fp8,fp8,0,0.12464640140533448
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,12,4,128,1,float16,float16,0,0.12959519624710084
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,12,4,128,1,fp8,fp8,0,0.12297600507736206
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,12,4,128,1,float16,fp8,0,0.121943998336792
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,12,12,128,1,float16,float16,0,0.08473280072212219
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,12,12,128,1,float16,fp8,0,0.08262079954147339
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,12,12,128,1,fp8,fp8,0,0.08326079845428466
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,12,1,128,1,float16,float16,0,0.08203520178794861
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,12,1,128,1,float16,fp8,0,0.0783456027507782
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,12,1,128,1,fp8,fp8,0,0.0785152018070221
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,12,2,128,1,float16,float16,0,0.08126400113105774
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,12,2,128,1,float16,fp8,0,0.07824479937553405
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,12,2,128,1,fp8,fp8,0,0.07874559760093688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,12,4,128,1,float16,fp8,0,0.07829599976539611
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,12,4,128,1,fp8,fp8,0,0.07818719744682312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,12,4,128,1,float16,float16,0,0.08267520070075988
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,12,1,128,1,float16,float16,0,0.8352128028869629
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,12,1,128,1,float16,fp8,0,0.8427951812744141
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,12,1,128,1,fp8,fp8,0,0.8339263916015625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,12,2,128,1,float16,float16,0,0.828335952758789
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,12,2,128,1,float16,fp8,0,0.8386528015136718
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,12,2,128,1,fp8,fp8,0,0.8388544082641601
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,12,4,128,1,float16,float16,0,0.8452624320983887
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,12,4,128,1,float16,fp8,0,0.8338656425476074
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,12,12,128,1,float16,float16,0,0.512220811843872
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,12,12,128,1,float16,fp8,0,0.4909520149230957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,12,12,128,1,fp8,fp8,0,0.4933472156524658
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,12,4,128,1,fp8,fp8,0,0.9962464332580566
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,12,1,128,1,float16,float16,0,0.44513759613037107
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,12,1,128,1,float16,fp8,0,0.5020048141479492
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,12,1,128,1,fp8,fp8,0,0.44387521743774416
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,12,2,128,1,float16,float16,0,0.4415152072906494
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,12,2,128,1,float16,fp8,0,0.44236159324645996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,12,2,128,1,fp8,fp8,0,0.5377696037292481
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,12,4,128,1,float16,float16,0,0.45557918548583987
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,12,4,128,1,float16,fp8,0,0.4401343822479248
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,12,12,128,1,float16,float16,0,0.2724128007888794
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,12,4,128,1,fp8,fp8,0,0.4411344051361084
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,12,12,128,1,float16,fp8,0,0.2666064023971558
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,12,12,128,1,fp8,fp8,0,0.26721758842468263
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,12,1,128,1,float16,float16,0,0.23917601108551026
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,12,1,128,1,float16,fp8,0,0.24082400798797607
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,12,1,128,1,fp8,fp8,0,0.24170880317687987
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,12,2,128,1,float16,float16,0,0.24143359661102295
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,12,2,128,1,float16,fp8,0,0.23936159610748292
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,12,2,128,1,fp8,fp8,0,0.24269120693206786
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,12,4,128,1,float16,float16,0,0.24863040447235107
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,12,4,128,1,float16,fp8,0,0.2396752119064331
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,12,4,128,1,fp8,fp8,0,0.23699359893798827
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,12,12,128,1,float16,float16,0,0.15580639839172364
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,12,12,128,1,float16,fp8,0,0.14960960149765015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,12,12,128,1,fp8,fp8,0,0.15001280307769777
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,12,1,128,1,float16,float16,0,0.13616160154342652
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,12,1,128,1,float16,fp8,0,0.1354159951210022
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,12,1,128,1,fp8,fp8,0,0.1351359963417053
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,12,2,128,1,float16,float16,0,0.1387760043144226
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,12,2,128,1,float16,fp8,0,0.13480960130691527
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,12,2,128,1,fp8,fp8,0,0.1349087953567505
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,12,12,128,1,float16,float16,0,0.09512959718704224
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,12,4,128,1,float16,float16,0,0.14296480417251586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,12,4,128,1,fp8,fp8,0,0.1360111951828003
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,12,4,128,1,float16,fp8,0,0.1342512011528015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,12,12,128,1,float16,fp8,0,0.09097440242767334
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,12,12,128,1,fp8,fp8,0,0.09051520228385926
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,12,1,128,1,float16,float16,0,0.08653600215911865
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,12,1,128,1,float16,fp8,0,0.08393279910087585
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,12,1,128,1,fp8,fp8,0,0.08567360043525696
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,12,2,128,1,float16,float16,0,0.08506399989128113
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,12,2,128,1,float16,fp8,0,0.08411679863929748
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,12,2,128,1,fp8,fp8,0,0.08342239856719971
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,12,4,128,1,float16,float16,0,0.08683680295944214
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,12,4,128,1,float16,fp8,0,0.08409439921379089
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,12,4,128,1,fp8,fp8,0,0.08379039764404297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,12,12,128,1,float16,float16,0,0.05363199710845947
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,12,12,128,1,float16,fp8,0,0.05352640151977539
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,12,12,128,1,fp8,fp8,0,0.053539198637008664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,12,1,128,1,float16,float16,0,0.05150560140609741
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,12,1,128,1,float16,fp8,0,0.05084959864616394
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,12,1,128,1,fp8,fp8,0,0.050075197219848634
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,12,2,128,1,float16,float16,0,0.05159839987754822
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,12,2,128,1,float16,fp8,0,0.05103520154953003
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,12,2,128,1,fp8,fp8,0,0.05089759826660156
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,12,4,128,1,float16,float16,0,0.051523202657699586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,12,4,128,1,float16,fp8,0,0.05048480033874512
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,12,4,128,1,fp8,fp8,0,0.050607997179031375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,12,1,128,1,float16,float16,0,0.8281200408935547
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,12,1,128,1,float16,fp8,0,0.8729488372802734
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,12,1,128,1,fp8,fp8,0,0.8693408012390137
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,12,2,128,1,float16,float16,0,0.8198111534118653
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,12,2,128,1,float16,fp8,0,0.869819164276123
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,12,2,128,1,fp8,fp8,0,0.8686304092407227
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,12,4,128,1,float16,float16,0,0.856822395324707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,12,4,128,1,float16,fp8,0,0.869222354888916
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,12,4,128,1,fp8,fp8,0,0.8676400184631348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,12,12,128,1,float16,float16,0,0.5283775806427002
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,12,12,128,1,float16,fp8,0,0.5241471767425537
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,12,12,128,1,fp8,fp8,0,0.521127986907959
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,12,1,128,1,float16,float16,0,0.487827205657959
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,12,1,128,1,float16,fp8,0,0.4539360046386719
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,12,1,128,1,fp8,fp8,0,0.452513599395752
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,12,2,128,1,float16,float16,0,0.48531041145324705
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,12,2,128,1,fp8,fp8,0,0.4526944160461426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,12,2,128,1,float16,fp8,0,0.4515279769897461
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,12,4,128,1,float16,float16,0,0.4474160194396973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,12,12,128,1,float16,fp8,0,0.27614560127258303
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,12,12,128,1,float16,float16,0,0.2785856008529663
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,12,4,128,1,float16,fp8,0,0.5006608009338379
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,12,4,128,1,fp8,fp8,0,0.45331358909606934
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,12,1,128,1,float16,float16,0,0.23262240886688232
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,12,12,128,1,fp8,fp8,0,0.27647039890289304
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,12,1,128,1,float16,fp8,0,0.2431040048599243
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,12,1,128,1,fp8,fp8,0,0.24423999786376954
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,12,2,128,1,float16,float16,0,0.23272640705108644
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,12,2,128,1,float16,fp8,0,0.2430272102355957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,12,2,128,1,fp8,fp8,0,0.24083359241485597
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,12,4,128,1,float16,float16,0,0.24371039867401123
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,12,4,128,1,float16,fp8,0,0.2409264087677002
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,12,4,128,1,fp8,fp8,0,0.24079360961914062
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,12,12,128,1,float16,float16,0,0.1539471983909607
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,12,12,128,1,float16,fp8,0,0.15302560329437256
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,12,12,128,1,fp8,fp8,0,0.1513759970664978
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,12,1,128,1,float16,float16,0,0.13029760122299194
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,12,1,128,1,float16,fp8,0,0.1318992018699646
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,12,1,128,1,fp8,fp8,0,0.1340000033378601
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,12,2,128,1,float16,float16,0,0.1341264009475708
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,12,2,128,1,float16,fp8,0,0.13336960077285767
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,12,4,128,1,float16,float16,0,0.13619840145111084
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,12,2,128,1,fp8,fp8,0,0.13188960552215576
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,12,12,128,1,float16,float16,0,0.09064159989356994
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,12,12,128,1,float16,fp8,0,0.08788319826126098
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,12,4,128,1,float16,fp8,0,0.13423839807510377
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,12,4,128,1,fp8,fp8,0,0.13230079412460327
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,12,12,128,1,fp8,fp8,0,0.08860639929771423
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,12,1,128,1,float16,float16,0,0.0771503984928131
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,12,1,128,1,float16,fp8,0,0.07959840297698975
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,12,1,128,1,fp8,fp8,0,0.07938560247421264
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,12,2,128,1,float16,float16,0,0.07775359749794006
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,12,2,128,1,float16,fp8,0,0.0790287971496582
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,12,2,128,1,fp8,fp8,0,0.07917280197143554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,12,4,128,1,float16,float16,0,0.07977759838104248
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,12,4,128,1,float16,fp8,0,0.07927359938621521
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,12,4,128,1,fp8,fp8,0,0.07941920161247254
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,12,12,128,1,float16,float16,0,0.05526080131530762
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,12,12,128,1,float16,fp8,0,0.05638719797134399
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,12,12,128,1,fp8,fp8,0,0.05763999819755554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,12,1,128,1,float16,float16,0,0.051475197076797485
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,12,1,128,1,fp8,fp8,0,0.051918399333953855
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,12,1,128,1,float16,fp8,0,0.05284479856491089
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,12,2,128,1,float16,float16,0,0.05228319764137268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,12,2,128,1,float16,fp8,0,0.051712000370025636
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,12,2,128,1,fp8,fp8,0,0.05189599990844727
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,12,4,128,1,float16,float16,0,0.05320960283279419
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,12,4,128,1,float16,fp8,0,0.05277119874954224
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,12,12,128,1,float16,float16,0,0.039087998867034915
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,12,12,128,1,float16,fp8,0,0.03930880129337311
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,12,4,128,1,fp8,fp8,0,0.05189759731292724
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,12,12,128,1,fp8,fp8,0,0.03930239975452423
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,12,1,128,1,float16,float16,0,0.037057599425315856
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,12,1,128,1,float16,fp8,0,0.0370959997177124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,12,1,128,1,fp8,fp8,0,0.03705439865589142
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,12,2,128,1,float16,float16,0,0.037212800979614255
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,12,2,128,1,float16,fp8,0,0.037028801441192624
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,12,2,128,1,fp8,fp8,0,0.03718239963054657
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,12,4,128,1,float16,float16,0,0.03888800144195557
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,12,4,128,1,float16,fp8,0,0.03702079951763153
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,12,4,128,1,fp8,fp8,0,0.03704479932785034
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,12,1,128,1,float16,float16,0,0.5332911968231201
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,12,1,128,1,float16,fp8,0,0.570742416381836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,12,1,128,1,fp8,fp8,0,0.5729423999786377
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,12,2,128,1,float16,float16,0,0.5273568153381347
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,12,2,128,1,float16,fp8,0,0.5697616100311279
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,12,2,128,1,fp8,fp8,0,0.5707551956176757
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,12,4,128,1,float16,float16,0,0.5553696155548096
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,12,4,128,1,float16,fp8,0,0.5684480190277099
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,12,4,128,1,fp8,fp8,0,0.5693344116210938
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,12,12,128,1,float16,float16,0,0.3509887933731079
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,12,12,128,1,float16,fp8,0,0.35187199115753176
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,12,12,128,1,fp8,fp8,0,0.3548352003097534
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,12,1,128,1,float16,float16,0,0.2801264047622681
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,12,1,128,1,float16,fp8,0,0.300164794921875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,12,1,128,1,fp8,fp8,0,0.30121760368347167
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,12,2,128,1,float16,float16,0,0.28040640354156493
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,12,2,128,1,float16,fp8,0,0.2998032093048096
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,12,2,128,1,fp8,fp8,0,0.3023616075515747
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,12,4,128,1,float16,float16,0,0.292902398109436
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,12,4,128,1,float16,fp8,0,0.2986560106277466
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,12,4,128,1,fp8,fp8,0,0.3009952068328857
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,12,12,128,1,float16,float16,0,0.18894239664077758
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,12,12,128,1,float16,fp8,0,0.18860479593276977
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,12,1,128,1,float16,float16,0,0.15294719934463502
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,12,12,128,1,fp8,fp8,0,0.1908720016479492
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,12,1,128,1,float16,fp8,0,0.16043200492858886
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,12,1,128,1,fp8,fp8,0,0.1627679944038391
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,12,2,128,1,float16,float16,0,0.1524127960205078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,12,2,128,1,float16,fp8,0,0.16099679470062256
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,12,2,128,1,fp8,fp8,0,0.16250879764556886
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,12,4,128,1,float16,float16,0,0.1594912052154541
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,12,12,128,1,float16,float16,0,0.1044160008430481
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,12,4,128,1,float16,fp8,0,0.16048480272293092
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,12,4,128,1,fp8,fp8,0,0.16291199922561644
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,12,1,128,1,float16,float16,0,0.08592960238456726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,12,12,128,1,fp8,fp8,0,0.10483200550079345
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,12,12,128,1,float16,fp8,0,0.10647519826889038
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,12,1,128,1,float16,fp8,0,0.09131360054016113
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,12,1,128,1,fp8,fp8,0,0.0895904004573822
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,12,2,128,1,float16,float16,0,0.08613119721412658
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,12,2,128,1,float16,fp8,0,0.09075040221214295
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,12,2,128,1,fp8,fp8,0,0.08967679738998413
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,12,4,128,1,float16,float16,0,0.09077439904212951
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,12,12,128,1,float16,float16,0,0.062115198373794554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,12,4,128,1,float16,fp8,0,0.09119679927825927
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,12,4,128,1,fp8,fp8,0,0.08996319770812988
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,12,12,128,1,float16,fp8,0,0.06184800267219544
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,12,1,128,1,float16,float16,0,0.053255999088287355
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,12,12,128,1,fp8,fp8,0,0.06210079789161682
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,12,1,128,1,float16,fp8,0,0.05555840134620667
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,12,1,128,1,fp8,fp8,0,0.05515360236167908
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,12,2,128,1,float16,float16,0,0.053609597682952884
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,12,2,128,1,float16,fp8,0,0.05547040104866028
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,12,2,128,1,fp8,fp8,0,0.055415999889373777
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,12,4,128,1,float16,float16,0,0.05572479963302612
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,12,12,128,1,float16,float16,0,0.03560320138931274
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,12,4,128,1,float16,fp8,0,0.055318397283554074
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,12,4,128,1,fp8,fp8,0,0.05560160279273987
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,12,12,128,1,float16,fp8,0,0.03706879913806915
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,12,12,128,1,fp8,fp8,0,0.03705120086669922
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,12,1,128,1,float16,fp8,0,0.032969599962234496
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,12,1,128,1,float16,float16,0,0.03309760093688965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,12,1,128,1,fp8,fp8,0,0.033078399300575254
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,12,2,128,1,float16,float16,0,0.03294079899787903
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,12,2,128,1,float16,fp8,0,0.033267199993133545
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,12,2,128,1,fp8,fp8,0,0.03371520042419433
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,12,4,128,1,float16,float16,0,0.03302719891071319
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,12,4,128,1,float16,fp8,0,0.03315680027008057
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,12,4,128,1,fp8,fp8,0,0.03298560082912445
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,12,12,128,1,float16,float16,0,0.03293919861316681
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,12,12,128,1,float16,fp8,0,0.032971200346946714
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,12,12,128,1,fp8,fp8,0,0.03297280073165894
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,12,1,128,1,float16,float16,0,0.03096640110015869
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,12,1,128,1,float16,fp8,0,0.030969598889350893
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,12,1,128,1,fp8,fp8,0,0.030943998694419862
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,12,2,128,1,float16,float16,0,0.03091999888420105
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,12,2,128,1,float16,fp8,0,0.030921599268913268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,12,2,128,1,fp8,fp8,0,0.030964800715446474
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,12,4,128,1,float16,float16,0,0.03097440004348755
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,12,4,128,1,float16,fp8,0,0.030980798602104186
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,12,4,128,1,fp8,fp8,0,0.03091520071029663
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,12,1,128,1,float16,float16,0,0.5591760158538819
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,12,2,128,1,float16,float16,0,0.5493087768554688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,12,1,128,1,fp8,fp8,0,0.6394239902496338
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,12,1,128,1,float16,fp8,0,0.6352255821228028
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,12,2,128,1,float16,fp8,0,0.634065580368042
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,12,2,128,1,fp8,fp8,0,0.6326543807983398
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,12,4,128,1,float16,float16,0,0.5954207897186279
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,12,4,128,1,float16,fp8,0,0.6324960231781006
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,12,12,128,1,float16,float16,0,0.3884912014007568
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,12,12,128,1,float16,fp8,0,0.4040527820587158
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,12,12,128,1,fp8,fp8,0,0.4008927822113037
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,12,4,128,1,fp8,fp8,0,0.6320816040039062
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,12,1,128,1,float16,float16,0,0.2912463903427124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,12,1,128,1,float16,fp8,0,0.3336800098419189
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,12,1,128,1,fp8,fp8,0,0.3310015916824341
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,12,2,128,1,float16,float16,0,0.29065120220184326
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,12,2,128,1,float16,fp8,0,0.3318608045578003
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,12,2,128,1,fp8,fp8,0,0.32975039482116697
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,12,4,128,1,float16,float16,0,0.31101438999176023
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,12,12,128,1,float16,float16,0,0.20521600246429444
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,12,4,128,1,float16,fp8,0,0.3319616079330444
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,12,4,128,1,fp8,fp8,0,0.3284703969955444
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,12,12,128,1,float16,fp8,0,0.21195681095123292
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,12,12,128,1,fp8,fp8,0,0.2120431900024414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,12,1,128,1,float16,float16,0,0.1583888053894043
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,12,1,128,1,float16,fp8,0,0.17790720462799073
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,12,2,128,1,float16,float16,0,0.15813920497894288
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,12,1,128,1,fp8,fp8,0,0.176528000831604
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,12,2,128,1,float16,fp8,0,0.17682720422744752
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,12,2,128,1,fp8,fp8,0,0.17582720518112183
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,12,4,128,1,float16,float16,0,0.1682752013206482
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,12,12,128,1,float16,float16,0,0.1131775975227356
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,12,4,128,1,float16,fp8,0,0.17625440359115602
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,12,4,128,1,fp8,fp8,0,0.1751695990562439
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,12,12,128,1,float16,fp8,0,0.11616480350494385
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,12,1,128,1,float16,float16,0,0.08809120059013367
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,12,12,128,1,fp8,fp8,0,0.11567360162734985
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,12,1,128,1,float16,fp8,0,0.09531520009040832
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,12,1,128,1,fp8,fp8,0,0.09520480036735535
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,12,2,128,1,float16,float16,0,0.08893600106239319
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,12,2,128,1,float16,fp8,0,0.09582880139350891
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,12,2,128,1,fp8,fp8,0,0.09485440254211426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,12,4,128,1,float16,float16,0,0.09326239824295043
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,12,4,128,1,float16,fp8,0,0.09623519778251648
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,12,12,128,1,float16,float16,0,0.0648751974105835
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,12,4,128,1,fp8,fp8,0,0.0956063985824585
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,12,12,128,1,float16,fp8,0,0.065583997964859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,12,12,128,1,fp8,fp8,0,0.06554880142211914
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,12,1,128,1,float16,float16,0,0.05289760231971741
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,12,1,128,1,float16,fp8,0,0.05710240006446839
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,12,1,128,1,fp8,fp8,0,0.05619199872016907
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,12,2,128,1,float16,float16,0,0.05291200280189514
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,12,2,128,1,float16,fp8,0,0.057169598340988156
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,12,2,128,1,fp8,fp8,0,0.05600159764289856
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,12,4,128,1,float16,float16,0,0.05504639744758606
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,12,4,128,1,float16,fp8,0,0.05667359828948974
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,12,4,128,1,fp8,fp8,0,0.056176000833511354
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,12,12,128,1,float16,float16,0,0.039129599928855896
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,12,12,128,1,float16,fp8,0,0.04252159893512726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,12,1,128,1,float16,float16,0,0.03627200126647949
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,12,12,128,1,fp8,fp8,0,0.04143519997596741
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,12,1,128,1,float16,fp8,0,0.03749760091304779
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,12,1,128,1,fp8,fp8,0,0.03707840144634247
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,12,2,128,1,float16,float16,0,0.03552800118923187
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,12,2,128,1,float16,fp8,0,0.03782239854335785
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,12,4,128,1,float16,float16,0,0.03705439865589142
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,12,4,128,1,float16,fp8,0,0.0372655987739563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,12,2,128,1,fp8,fp8,0,0.03723039925098419
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,12,12,128,1,float16,float16,0,0.027049601078033447
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,12,4,128,1,fp8,fp8,0,0.03702239990234375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,12,12,128,1,float16,fp8,0,0.02885279953479767
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,12,1,128,1,float16,float16,0,0.024855999648571013
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,12,12,128,1,fp8,fp8,0,0.02892960011959076
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,12,1,128,1,float16,fp8,0,0.026743999123573302
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,12,1,128,1,fp8,fp8,0,0.026841598749160766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,12,2,128,1,float16,float16,0,0.024915200471878052
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,12,2,128,1,float16,fp8,0,0.026822400093078614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,12,2,128,1,fp8,fp8,0,0.02680160105228424
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,12,4,128,1,float16,float16,0,0.026704001426696777
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,12,4,128,1,float16,fp8,0,0.026807999610900878
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,12,4,128,1,fp8,fp8,0,0.026824000477790832
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,12,12,128,1,float16,float16,0,0.02678079903125763
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,12,12,128,1,float16,fp8,0,0.026345598697662353
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,12,12,128,1,fp8,fp8,0,0.026800000667572023
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,12,1,128,1,float16,float16,0,0.02481279969215393
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,12,1,128,1,float16,fp8,0,0.0247871994972229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,12,2,128,1,float16,float16,0,0.0248879998922348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,12,1,128,1,fp8,fp8,0,0.02493920028209686
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,12,2,128,1,float16,fp8,0,0.024792000651359558
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,12,2,128,1,fp8,fp8,0,0.024823999404907225
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,12,4,128,1,float16,float16,0,0.024854399263858795
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,12,4,128,1,float16,fp8,0,0.02479359954595566
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,12,4,128,1,fp8,fp8,0,0.024804799258708952
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,12,1,128,1,float16,float16,0,0.4232175827026367
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,12,1,128,1,float16,fp8,0,0.5154143810272217
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,12,1,128,1,fp8,fp8,0,0.5179135799407959
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,12,2,128,1,float16,float16,0,0.42026557922363283
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,12,2,128,1,float16,fp8,0,0.5134367942810059
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,12,2,128,1,fp8,fp8,0,0.5160831928253173
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,12,4,128,1,float16,float16,0,0.45899038314819335
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,12,12,128,1,float16,float16,0,0.3197472095489502
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,12,4,128,1,float16,fp8,0,0.5122159957885742
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,12,4,128,1,fp8,fp8,0,0.515008020401001
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,12,12,128,1,float16,fp8,0,0.33817119598388673
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,12,12,128,1,fp8,fp8,0,0.3397135972976685
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,12,1,128,1,float16,float16,0,0.22272000312805176
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,12,1,128,1,float16,fp8,0,0.26836318969726564
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,12,1,128,1,fp8,fp8,0,0.2693984031677246
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,12,2,128,1,float16,float16,0,0.22161600589752198
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,12,2,128,1,float16,fp8,0,0.2677344083786011
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,12,4,128,1,float16,float16,0,0.23963360786437987
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,12,2,128,1,fp8,fp8,0,0.2678879976272583
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,12,4,128,1,float16,fp8,0,0.26821279525756836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,12,4,128,1,fp8,fp8,0,0.2657520055770874
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,12,12,128,1,float16,float16,0,0.16711039543151857
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,12,12,128,1,float16,fp8,0,0.17807040214538575
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,12,12,128,1,fp8,fp8,0,0.17700320482254028
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,12,1,128,1,float16,float16,0,0.12011679410934448
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,12,1,128,1,float16,fp8,0,0.14147520065307617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,12,2,128,1,float16,float16,0,0.11965919733047485
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,12,1,128,1,fp8,fp8,0,0.14206399917602539
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,12,2,128,1,float16,fp8,0,0.14080959558486938
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,12,2,128,1,fp8,fp8,0,0.14135199785232544
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,12,4,128,1,float16,float16,0,0.12999039888381958
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,12,4,128,1,float16,fp8,0,0.14099680185317992
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,12,12,128,1,float16,float16,0,0.09068160057067871
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,12,4,128,1,fp8,fp8,0,0.14125920534133912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,12,12,128,1,float16,fp8,0,0.09543520212173462
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,12,1,128,1,float16,float16,0,0.06660799980163574
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,12,12,128,1,fp8,fp8,0,0.09594560265541077
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,12,1,128,1,float16,fp8,0,0.07589920163154602
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,12,1,128,1,fp8,fp8,0,0.07618880271911621
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,12,2,128,1,float16,float16,0,0.06758400201797485
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,12,2,128,1,float16,fp8,0,0.07602880001068116
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,12,2,128,1,fp8,fp8,0,0.07636319994926452
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,12,4,128,1,float16,float16,0,0.07271040081977845
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,12,4,128,1,float16,fp8,0,0.07647519707679748
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,12,12,128,1,float16,float16,0,0.052108800411224364
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,12,4,128,1,fp8,fp8,0,0.07637599706649781
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,12,12,128,1,float16,fp8,0,0.05491679906845093
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,12,12,128,1,fp8,fp8,0,0.055238401889801024
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,12,1,128,1,float16,float16,0,0.040540799498558044
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,12,1,128,1,float16,fp8,0,0.04527519941329956
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,12,1,128,1,fp8,fp8,0,0.0455375999212265
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,12,2,128,1,float16,float16,0,0.040324801206588747
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,12,2,128,1,float16,fp8,0,0.04577440023422241
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,12,2,128,1,fp8,fp8,0,0.045281600952148435
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,12,4,128,1,float16,float16,0,0.04324159920215607
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,12,4,128,1,fp8,fp8,0,0.045793598890304564
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,12,4,128,1,float16,fp8,0,0.0453792005777359
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,12,12,128,1,float16,float16,0,0.03210560083389282
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,12,12,128,1,float16,fp8,0,0.03497920036315918
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,12,12,128,1,fp8,fp8,0,0.035036799311637876
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,12,1,128,1,float16,float16,0,0.02886880040168762
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,12,1,128,1,float16,fp8,0,0.030895999073982237
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,12,2,128,1,float16,float16,0,0.02882240116596222
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,12,1,128,1,fp8,fp8,0,0.03091199994087219
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,12,2,128,1,float16,fp8,0,0.030963200330734252
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,12,2,128,1,fp8,fp8,0,0.031043198704719544
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,12,4,128,1,float16,float16,0,0.02892960011959076
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,12,4,128,1,float16,fp8,0,0.030899199843406677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,12,4,128,1,fp8,fp8,0,0.03094559907913208
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,12,12,128,1,float16,float16,0,0.020657600462436677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,12,12,128,1,float16,fp8,0,0.022711999714374542
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,12,12,128,1,fp8,fp8,0,0.022784000635147093
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,12,1,128,1,float16,float16,0,0.01900160014629364
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,12,1,128,1,float16,fp8,0,0.02066880017518997
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,12,1,128,1,fp8,fp8,0,0.020695999264717102
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,12,2,128,1,float16,float16,0,0.018697600066661834
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,12,2,128,1,float16,fp8,0,0.020692799985408784
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,12,2,128,1,fp8,fp8,0,0.02078240066766739
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,12,4,128,1,float16,float16,0,0.020608000457286835
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,12,4,128,1,float16,fp8,0,0.020708799362182617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,12,12,128,1,float16,float16,0,0.018918399512767792
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,12,4,128,1,fp8,fp8,0,0.020854400098323823
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,12,12,128,1,float16,fp8,0,0.020664000511169435
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,12,1,128,1,float16,float16,0,0.018705600500106813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,12,12,128,1,fp8,fp8,0,0.020657600462436677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,12,1,128,1,float16,fp8,0,0.01870400011539459
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,12,1,128,1,fp8,fp8,0,0.01873600035905838
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,12,2,128,1,float16,float16,0,0.018641600012779237
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,12,2,128,1,float16,fp8,0,0.018636800348758698
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,12,2,128,1,fp8,fp8,0,0.019721600413322448
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,12,4,128,1,float16,float16,0,0.018639999628067016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,12,4,128,1,float16,fp8,0,0.01870719939470291
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,12,4,128,1,fp8,fp8,0,0.020576000213623047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,12,12,128,1,float16,float16,0,0.01860159933567047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,12,12,128,1,fp8,fp8,0,0.018718400597572328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,12,12,128,1,float16,fp8,0,0.018675200641155243
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,12,1,128,1,float16,float16,0,0.018588800728321076
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,12,1,128,1,float16,fp8,0,0.01871200054883957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,12,1,128,1,fp8,fp8,0,0.01863359957933426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,12,2,128,1,float16,float16,0,0.01860959976911545
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,12,2,128,1,float16,fp8,0,0.018671999871730804
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,12,2,128,1,fp8,fp8,0,0.01852799952030182
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,12,4,128,1,float16,float16,0,0.01908479928970337
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,12,4,128,1,fp8,fp8,0,0.018615999817848207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,12,4,128,1,float16,fp8,0,0.018670399487018586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,12,1,128,1,float16,float16,0,0.18434239625930787
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,12,1,128,1,float16,fp8,0,0.2349071979522705
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,12,1,128,1,fp8,fp8,0,0.234932804107666
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,12,2,128,1,float16,float16,0,0.18387360572814943
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,12,2,128,1,float16,fp8,0,0.23380959033966064
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,12,2,128,1,fp8,fp8,0,0.2341455936431885
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,12,4,128,1,float16,float16,0,0.20154240131378173
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,12,4,128,1,fp8,fp8,0,0.23302240371704103
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,12,4,128,1,float16,fp8,0,0.23292160034179688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,12,12,128,1,float16,float16,0,0.14772160053253175
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,12,1,128,1,float16,float16,0,0.0998528003692627
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,12,12,128,1,float16,fp8,0,0.16011040210723876
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,12,12,128,1,fp8,fp8,0,0.16030880212783813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,12,2,128,1,float16,float16,0,0.10125600099563599
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,12,1,128,1,float16,fp8,0,0.1246351957321167
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,12,1,128,1,fp8,fp8,0,0.125110399723053
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,12,2,128,1,float16,fp8,0,0.12480159997940063
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,12,2,128,1,fp8,fp8,0,0.12476960420608521
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,12,4,128,1,float16,float16,0,0.11081919670104981
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,12,4,128,1,float16,fp8,0,0.12519359588623047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,12,12,128,1,float16,float16,0,0.08241919875144958
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,12,4,128,1,fp8,fp8,0,0.12553280591964722
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,12,12,128,1,float16,fp8,0,0.08830559849739075
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,12,1,128,1,float16,float16,0,0.05853279829025269
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,12,12,128,1,fp8,fp8,0,0.08828480243682861
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,12,1,128,1,float16,fp8,0,0.06823840141296386
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,12,1,128,1,fp8,fp8,0,0.06868799924850463
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,12,2,128,1,float16,float16,0,0.05866079926490784
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,12,2,128,1,float16,fp8,0,0.0688048005104065
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,12,4,128,1,float16,float16,0,0.0639024019241333
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,12,2,128,1,fp8,fp8,0,0.06875680088996887
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,12,4,128,1,float16,fp8,0,0.0695583999156952
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,12,4,128,1,fp8,fp8,0,0.06958879828453064
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,12,12,128,1,float16,fp8,0,0.04901440143585205
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,12,12,128,1,float16,float16,0,0.04655520021915436
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,12,12,128,1,fp8,fp8,0,0.049239999055862425
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,12,1,128,1,float16,float16,0,0.03308480083942413
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,12,1,128,1,float16,fp8,0,0.039776000380516055
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,12,2,128,1,float16,float16,0,0.03369599878787995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,12,1,128,1,fp8,fp8,0,0.03907679915428162
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,12,2,128,1,float16,fp8,0,0.03936319947242737
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,12,4,128,1,float16,float16,0,0.035780799388885495
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,12,2,128,1,fp8,fp8,0,0.0392192006111145
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,12,4,128,1,float16,fp8,0,0.03942559957504273
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,12,4,128,1,fp8,fp8,0,0.039155200123786926
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,12,12,128,1,float16,float16,0,0.02818560004234314
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,12,12,128,1,float16,fp8,0,0.03301439881324768
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,12,12,128,1,fp8,fp8,0,0.03301759958267212
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,12,1,128,1,float16,float16,0,0.02492000013589859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,12,1,128,1,float16,fp8,0,0.028763198852539064
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,12,1,128,1,fp8,fp8,0,0.028043198585510253
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,12,2,128,1,float16,float16,0,0.024820800125598907
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,12,2,128,1,float16,fp8,0,0.02873600125312805
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,12,4,128,1,float16,float16,0,0.026545599102973938
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,12,2,128,1,fp8,fp8,0,0.026907199621200563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,12,4,128,1,float16,fp8,0,0.02882240116596222
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,12,4,128,1,fp8,fp8,0,0.028443199396133424
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,12,12,128,1,float16,float16,0,0.01863519996404648
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,12,12,128,1,fp8,fp8,0,0.020715199410915375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,12,12,128,1,float16,fp8,0,0.020790399610996248
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,12,1,128,1,float16,float16,0,0.01658080071210861
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,12,1,128,1,float16,fp8,0,0.018607999384403228
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,12,2,128,1,float16,float16,0,0.01660960018634796
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,12,1,128,1,fp8,fp8,0,0.01849920004606247
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,12,2,128,1,float16,fp8,0,0.018542400002479552
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,12,2,128,1,fp8,fp8,0,0.018628799915313722
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,12,4,128,1,float16,float16,0,0.0166703999042511
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,12,4,128,1,float16,fp8,0,0.01866080015897751
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,12,12,128,1,float16,float16,0,0.016771200299263
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,12,4,128,1,fp8,fp8,0,0.01860959976911545
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,12,12,128,1,float16,fp8,0,0.018462400138378143
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,12,1,128,1,float16,float16,0,0.016463999450206757
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,12,12,128,1,fp8,fp8,0,0.018513600528240203
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,12,1,128,1,float16,fp8,0,0.016680000722408293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,12,1,128,1,fp8,fp8,0,0.016574400663375854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,12,2,128,1,float16,float16,0,0.016513599455356597
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,12,2,128,1,float16,fp8,0,0.016683200001716615
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,12,2,128,1,fp8,fp8,0,0.01661919951438904
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,12,4,128,1,float16,float16,0,0.016683200001716615
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,12,4,128,1,float16,fp8,0,0.016769599914550782
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,12,4,128,1,fp8,fp8,0,0.016577599942684172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,12,12,128,1,float16,float16,0,0.016627199947834015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,12,12,128,1,float16,fp8,0,0.016625599563121797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,12,12,128,1,fp8,fp8,0,0.016728000342845918
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,12,1,128,1,float16,float16,0,0.016497600078582763
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,12,1,128,1,float16,fp8,0,0.016492800414562227
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,12,1,128,1,fp8,fp8,0,0.01658879965543747
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,12,2,128,1,float16,float16,0,0.016417600214481354
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,12,2,128,1,float16,fp8,0,0.016484799981117248
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,12,2,128,1,fp8,fp8,0,0.01656000018119812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,12,4,128,1,float16,float16,0,0.0165583997964859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,12,4,128,1,float16,fp8,0,0.01647839993238449
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,12,4,128,1,fp8,fp8,0,0.016513599455356597
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,12,12,128,1,float16,float16,0,0.01652639955282211
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,12,12,128,1,float16,fp8,0,0.016574400663375854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,12,12,128,1,fp8,fp8,0,0.01659200042486191
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,12,1,128,1,float16,float16,0,0.01648000031709671
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,12,1,128,1,float16,fp8,0,0.01643040031194687
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,12,1,128,1,fp8,fp8,0,0.015009599924087524
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,12,2,128,1,float16,float16,0,0.01653439998626709
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,12,2,128,1,float16,fp8,0,0.016513599455356597
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,12,4,128,1,float16,fp8,0,0.0165583997964859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,12,4,128,1,float16,float16,0,0.016548800468444824
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,12,2,128,1,fp8,fp8,0,0.01661120057106018
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,12,4,128,1,fp8,fp8,0,0.01652639955282211
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,12,1,128,1,float16,fp8,0,0.13759679794311525
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,12,1,128,1,float16,float16,0,0.11360000371932984
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,12,1,128,1,fp8,fp8,0,0.13745440244674684
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,12,2,128,1,float16,float16,0,0.11353280544281005
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,12,2,128,1,float16,fp8,0,0.1376144051551819
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,12,2,128,1,fp8,fp8,0,0.13774559497833253
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,12,4,128,1,float16,float16,0,0.12298879623413086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,12,4,128,1,fp8,fp8,0,0.13727519512176514
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,12,4,128,1,float16,fp8,0,0.1377776026725769
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,12,12,128,1,float16,float16,0,0.08442559838294983
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,12,12,128,1,float16,fp8,0,0.09262239933013916
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,12,12,128,1,fp8,fp8,0,0.09293599724769593
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,12,1,128,1,float16,float16,0,0.06326559782028199
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,12,1,128,1,float16,fp8,0,0.07393440008163452
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,12,1,128,1,fp8,fp8,0,0.07397760152816772
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,12,2,128,1,float16,float16,0,0.06383519768714904
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,12,2,128,1,float16,fp8,0,0.07401279807090759
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,12,2,128,1,fp8,fp8,0,0.07387199997901917
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,12,4,128,1,float16,float16,0,0.06796320080757141
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,12,4,128,1,float16,fp8,0,0.07451040148735047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,12,4,128,1,fp8,fp8,0,0.0744047999382019
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,12,12,128,1,float16,float16,0,0.048403200507164
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,12,12,128,1,float16,fp8,0,0.05142880082130432
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,12,12,128,1,fp8,fp8,0,0.05145760178565979
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,12,1,128,1,float16,float16,0,0.03710559904575348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,12,1,128,1,float16,fp8,0,0.04296959936618805
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,12,1,128,1,fp8,fp8,0,0.042233601212501526
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,12,2,128,1,float16,float16,0,0.036340799927711484
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,12,2,128,1,float16,fp8,0,0.043003201484680176
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,12,2,128,1,fp8,fp8,0,0.04290400147438049
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,12,4,128,1,float16,float16,0,0.039201599359512326
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,12,4,128,1,float16,fp8,0,0.04321120083332062
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,12,4,128,1,fp8,fp8,0,0.04271839857101441
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,12,12,128,1,float16,float16,0,0.025553598999977112
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,12,12,128,1,fp8,fp8,0,0.030865600705146788
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,12,1,128,1,float16,float16,0,0.02281759977340698
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,12,12,128,1,float16,fp8,0,0.030895999073982237
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,12,1,128,1,float16,fp8,0,0.025600001215934753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,12,2,128,1,float16,float16,0,0.022707200050354003
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,12,1,128,1,fp8,fp8,0,0.026606398820877075
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,12,2,128,1,float16,fp8,0,0.026025599241256712
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,12,2,128,1,fp8,fp8,0,0.026723200082778932
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,12,4,128,1,float16,float16,0,0.02375999987125397
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,12,4,128,1,float16,fp8,0,0.02611039876937866
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,12,4,128,1,fp8,fp8,0,0.026651200652122498
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,12,12,128,1,float16,float16,0,0.018760000169277192
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,12,12,128,1,float16,fp8,0,0.021406400203704833
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,12,12,128,1,fp8,fp8,0,0.0207056000828743
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,12,1,128,1,float16,fp8,0,0.018862399458885192
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,12,1,128,1,float16,float16,0,0.01676799952983856
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,12,1,128,1,fp8,fp8,0,0.018667200207710268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,12,2,128,1,float16,float16,0,0.017608000338077544
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,12,2,128,1,float16,fp8,0,0.018886399269104005
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,12,2,128,1,fp8,fp8,0,0.01863359957933426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,12,4,128,1,float16,float16,0,0.01854719966650009
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,12,4,128,1,float16,fp8,0,0.019115200638771056
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,12,12,128,1,float16,float16,0,0.014446400105953217
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,12,12,128,1,float16,fp8,0,0.014732800424098969
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,12,4,128,1,fp8,fp8,0,0.01865600049495697
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,12,12,128,1,fp8,fp8,0,0.014577600359916686
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,12,1,128,1,float16,float16,0,0.012649600207805634
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,12,1,128,1,float16,fp8,0,0.012455999851226807
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,12,1,128,1,fp8,fp8,0,0.014427199959754944
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,12,2,128,1,float16,float16,0,0.012516799569129943
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,12,2,128,1,float16,fp8,0,0.012518399953842163
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,12,2,128,1,fp8,fp8,0,0.014502400159835815
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,12,4,128,1,float16,float16,0,0.012547199428081513
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,12,4,128,1,float16,fp8,0,0.012521600723266602
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,12,4,128,1,fp8,fp8,0,0.014486399292945863
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,12,12,128,1,float16,float16,0,0.012612800300121307
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,12,12,128,1,float16,fp8,0,0.012495999783277511
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,12,12,128,1,fp8,fp8,0,0.012579199671745301
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,12,1,128,1,float16,float16,0,0.012464000284671784
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,12,1,128,1,float16,fp8,0,0.012577599287033081
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,12,1,128,1,fp8,fp8,0,0.012510399520397186
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,12,2,128,1,float16,float16,0,0.012520000338554382
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,12,2,128,1,float16,fp8,0,0.01244800016283989
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,12,2,128,1,fp8,fp8,0,0.012617599964141846
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,12,4,128,1,float16,float16,0,0.01252480000257492
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,12,4,128,1,float16,fp8,0,0.012465599924325943
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,12,4,128,1,fp8,fp8,0,0.01252799928188324
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,12,12,128,1,float16,float16,0,0.01255200058221817
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,12,12,128,1,fp8,fp8,0,0.012540799379348756
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,12,12,128,1,float16,fp8,0,0.012508800625801087
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,12,1,128,1,float16,float16,0,0.012383999675512314
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,12,1,128,1,float16,fp8,0,0.01255040019750595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,12,1,128,1,fp8,fp8,0,0.012476799637079239
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,12,2,128,1,float16,float16,0,0.01255040019750595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,12,2,128,1,float16,fp8,0,0.012521600723266602
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,12,2,128,1,fp8,fp8,0,0.012494400143623352
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,12,4,128,1,float16,float16,0,0.012337599694728852
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,12,4,128,1,float16,fp8,0,0.012428800016641617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,12,4,128,1,fp8,fp8,0,0.012612800300121307
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,12,12,128,1,float16,float16,0,0.0125231996178627
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,12,12,128,1,float16,fp8,0,0.012359999865293504
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,12,12,128,1,fp8,fp8,0,0.012433599680662155
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,12,1,128,1,float16,float16,0,0.012459199875593185
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,12,1,128,1,float16,fp8,0,0.012412799894809723
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,12,1,128,1,fp8,fp8,0,0.012481600046157837
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,12,2,128,1,float16,fp8,0,0.012353599816560746
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,12,2,128,1,float16,float16,0,0.012425599992275238
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,12,2,128,1,fp8,fp8,0,0.012438400089740754
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,12,4,128,1,float16,float16,0,0.012451200187206269
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,12,4,128,1,float16,fp8,0,0.012436799705028534
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,12,4,128,1,fp8,fp8,0,0.012483199685811996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,12,1,128,1,float16,float16,0,0.08664799928665161
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,12,1,128,1,float16,fp8,0,0.09767839908599854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,12,1,128,1,fp8,fp8,0,0.09664639830589294
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,12,2,128,1,float16,float16,0,0.08681759834289551
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,12,2,128,1,float16,fp8,0,0.09810879826545715
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,12,2,128,1,fp8,fp8,0,0.09799039959907532
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,12,4,128,1,float16,float16,0,0.09102079868316651
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,12,4,128,1,float16,fp8,0,0.0985647976398468
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,12,4,128,1,fp8,fp8,0,0.0985376000404358
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,12,12,128,1,float16,float16,0,0.05917279720306397
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,12,12,128,1,float16,fp8,0,0.06376000046730042
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,12,1,128,1,float16,float16,0,0.047393599152565004
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,12,12,128,1,fp8,fp8,0,0.06368160247802734
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,12,1,128,1,float16,fp8,0,0.053508800268173215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,12,1,128,1,fp8,fp8,0,0.053446400165557864
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,12,2,128,1,float16,float16,0,0.047363200783729555
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,12,2,128,1,float16,fp8,0,0.05345119833946228
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,12,2,128,1,fp8,fp8,0,0.05351200103759766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,12,4,128,1,float16,float16,0,0.04971359968185425
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,12,4,128,1,float16,fp8,0,0.053572797775268556
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,12,4,128,1,fp8,fp8,0,0.05343359708786011
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,12,12,128,1,float16,float16,0,0.03288959860801697
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,12,12,128,1,float16,fp8,0,0.03708640038967133
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,12,12,128,1,fp8,fp8,0,0.03705120086669922
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,12,1,128,1,float16,float16,0,0.02885279953479767
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,12,1,128,1,fp8,fp8,0,0.03147040009498596
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,12,2,128,1,float16,float16,0,0.028947201371192933
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,12,1,128,1,float16,fp8,0,0.03288480043411255
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,12,2,128,1,float16,fp8,0,0.031228798627853393
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,12,2,128,1,fp8,fp8,0,0.03158400058746338
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,12,4,128,1,float16,float16,0,0.031092798709869383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,12,4,128,1,fp8,fp8,0,0.032995200157165526
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,12,4,128,1,float16,fp8,0,0.03133600056171417
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,12,12,128,1,float16,float16,0,0.02082560062408447
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,12,12,128,1,float16,fp8,0,0.02282080054283142
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,12,12,128,1,fp8,fp8,0,0.022784000635147093
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,12,1,128,1,float16,fp8,0,0.020739200711250304
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,12,1,128,1,fp8,fp8,0,0.02082560062408447
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,12,1,128,1,float16,float16,0,0.018838399648666383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,12,2,128,1,float16,float16,0,0.01868640035390854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,12,2,128,1,float16,fp8,0,0.020769600570201874
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,12,2,128,1,fp8,fp8,0,0.02072480022907257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,12,4,128,1,float16,fp8,0,0.020776000618934632
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,12,4,128,1,float16,float16,0,0.0186256006360054
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,12,4,128,1,fp8,fp8,0,0.020828799903392793
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,12,12,128,1,float16,float16,0,0.016473600268363954
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,12,12,128,1,float16,fp8,0,0.01659200042486191
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,12,12,128,1,fp8,fp8,0,0.01682399958372116
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,12,1,128,1,float16,float16,0,0.014710399508476257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,12,1,128,1,float16,fp8,0,0.015729600191116334
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,12,2,128,1,float16,float16,0,0.014595200121402741
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,12,1,128,1,fp8,fp8,0,0.015732799470424653
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,12,2,128,1,float16,fp8,0,0.016076800227165223
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,12,2,128,1,fp8,fp8,0,0.016457599401474
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,12,4,128,1,float16,float16,0,0.014716799557209014
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,12,4,128,1,float16,fp8,0,0.016590400040149687
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,12,4,128,1,fp8,fp8,0,0.016545599699020384
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,12,12,128,1,float16,fp8,0,0.0124719999730587
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,12,12,128,1,fp8,fp8,0,0.012539200484752655
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,12,12,128,1,float16,float16,0,0.012636800110340119
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,12,1,128,1,float16,float16,0,0.012091200053691863
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,12,1,128,1,float16,fp8,0,0.012481600046157837
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,12,1,128,1,fp8,fp8,0,0.012579199671745301
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,12,2,128,1,float16,float16,0,0.01247360035777092
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,12,2,128,1,float16,fp8,0,0.012444800138473511
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,12,4,128,1,float16,float16,0,0.012483199685811996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,12,4,128,1,float16,fp8,0,0.012412799894809723
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,12,2,128,1,fp8,fp8,0,0.012780800461769104
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,12,4,128,1,fp8,fp8,0,0.012492799758911132
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,12,12,128,1,float16,float16,0,0.01244800016283989
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,12,12,128,1,float16,fp8,0,0.012572799623012543
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,12,12,128,1,fp8,fp8,0,0.01247519999742508
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,12,1,128,1,float16,float16,0,0.011910399794578553
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,12,1,128,1,fp8,fp8,0,0.010555200278759003
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,12,1,128,1,float16,fp8,0,0.010608000308275222
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,12,2,128,1,float16,float16,0,0.011804799735546111
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,12,2,128,1,float16,fp8,0,0.010507199913263321
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,12,2,128,1,fp8,fp8,0,0.01242400035262108
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,12,4,128,1,float16,float16,0,0.010518400371074677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,12,4,128,1,float16,fp8,0,0.010497599840164185
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,12,4,128,1,fp8,fp8,0,0.012479999661445617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,12,12,128,1,float16,float16,0,0.010567999631166457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,12,12,128,1,float16,fp8,0,0.010446400195360184
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,12,12,128,1,fp8,fp8,0,0.010524799674749374
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,12,1,128,1,float16,float16,0,0.010686399787664414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,12,1,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,12,1,128,1,fp8,fp8,0,0.010566399991512298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,12,2,128,1,float16,float16,0,0.01055999994277954
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,12,2,128,1,float16,fp8,0,0.010500799864530563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,12,2,128,1,fp8,fp8,0,0.010476800054311753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,12,4,128,1,float16,float16,0,0.010515200346708298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,12,4,128,1,fp8,fp8,0,0.010550399869680404
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,12,4,128,1,float16,fp8,0,0.010465600341558457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,12,12,128,1,float16,fp8,0,0.010556799918413162
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,12,12,128,1,float16,float16,0,0.010758399963378906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,12,12,128,1,fp8,fp8,0,0.011036799848079681
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,12,1,128,1,float16,float16,0,0.010566399991512298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,12,1,128,1,float16,fp8,0,0.010604800283908844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,12,1,128,1,fp8,fp8,0,0.010436800122261048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,12,2,128,1,float16,float16,0,0.01053600013256073
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,12,2,128,1,fp8,fp8,0,0.010475199669599533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,12,4,128,1,float16,float16,0,0.010593599826097488
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,12,2,128,1,float16,fp8,0,0.010494399815797806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,12,4,128,1,float16,fp8,0,0.010471999645233154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,12,4,128,1,fp8,fp8,0,0.010500799864530563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,12,1,128,1,float16,float16,0,0.07289760112762451
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,12,1,128,1,float16,fp8,0,0.07803040146827697
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,12,2,128,1,float16,float16,0,0.07316799759864807
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,12,1,128,1,fp8,fp8,0,0.07878559827804565
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,12,2,128,1,float16,fp8,0,0.07800319790840149
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,12,2,128,1,fp8,fp8,0,0.07835040092468262
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,12,4,128,1,float16,float16,0,0.07575839757919312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,12,4,128,1,float16,fp8,0,0.0785311996936798
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,12,4,128,1,fp8,fp8,0,0.07823039889335633
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,12,12,128,1,float16,float16,0,0.04544320106506348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,12,12,128,1,float16,fp8,0,0.04814240038394928
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,12,12,128,1,fp8,fp8,0,0.04826880097389221
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,12,1,128,1,float16,fp8,0,0.04373759925365448
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,12,1,128,1,float16,float16,0,0.041654399037361144
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,12,1,128,1,fp8,fp8,0,0.04325920045375824
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,12,2,128,1,float16,float16,0,0.0411327987909317
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,12,2,128,1,float16,fp8,0,0.04335519969463349
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,12,2,128,1,fp8,fp8,0,0.043321600556373595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,12,4,128,1,float16,float16,0,0.043198400735855104
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,12,4,128,1,float16,fp8,0,0.04353919923305512
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,12,4,128,1,fp8,fp8,0,0.04335840046405792
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,12,12,128,1,float16,float16,0,0.026830399036407472
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,12,12,128,1,float16,fp8,0,0.028960001468658448
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,12,12,128,1,fp8,fp8,0,0.028913599252700806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,12,1,128,1,float16,fp8,0,0.02693760097026825
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,12,1,128,1,float16,float16,0,0.025702399015426636
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,12,1,128,1,fp8,fp8,0,0.026862400770187377
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,12,2,128,1,float16,float16,0,0.026748800277709962
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,12,2,128,1,float16,fp8,0,0.026873600482940675
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,12,2,128,1,fp8,fp8,0,0.026958400011062623
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,12,4,128,1,float16,float16,0,0.02674719989299774
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,12,4,128,1,float16,fp8,0,0.02680160105228424
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,12,4,128,1,fp8,fp8,0,0.02688640058040619
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,12,12,128,1,float16,float16,0,0.01857759952545166
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,12,12,128,1,float16,fp8,0,0.018665599822998046
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,12,12,128,1,fp8,fp8,0,0.018756799399852753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,12,1,128,1,float16,float16,0,0.01672160029411316
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,12,1,128,1,float16,fp8,0,0.01772159934043884
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,12,1,128,1,fp8,fp8,0,0.018676799535751343
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,12,2,128,1,float16,float16,0,0.016655999422073364
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,12,2,128,1,float16,fp8,0,0.018430399894714355
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,12,4,128,1,float16,float16,0,0.018134400248527527
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,12,2,128,1,fp8,fp8,0,0.01866080015897751
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,12,4,128,1,float16,fp8,0,0.018161599338054658
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,12,12,128,1,float16,float16,0,0.01459999978542328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,12,12,128,1,float16,fp8,0,0.01454399973154068
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,12,4,128,1,fp8,fp8,0,0.01865600049495697
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,12,1,128,1,float16,float16,0,0.013068799674510957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,12,1,128,1,float16,fp8,0,0.014494399726390838
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,12,12,128,1,fp8,fp8,0,0.014545600116252898
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,12,1,128,1,fp8,fp8,0,0.01451520025730133
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,12,2,128,1,float16,float16,0,0.014475199580192565
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,12,2,128,1,float16,fp8,0,0.014574399590492249
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,12,2,128,1,fp8,fp8,0,0.01446399986743927
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,12,4,128,1,float16,float16,0,0.014395199716091156
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,12,4,128,1,float16,fp8,0,0.014508800208568573
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,12,4,128,1,fp8,fp8,0,0.014499199390411378
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,12,12,128,1,float16,float16,0,0.010873600095510482
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,12,12,128,1,fp8,fp8,0,0.010574399679899215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,12,12,128,1,float16,fp8,0,0.010503999888896942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,12,1,128,1,float16,float16,0,0.010596799850463866
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,12,1,128,1,float16,fp8,0,0.010396800190210342
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,12,1,128,1,fp8,fp8,0,0.010353600233793258
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,12,2,128,1,float16,float16,0,0.010492800176143647
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,12,2,128,1,float16,fp8,0,0.010468800365924836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,12,2,128,1,fp8,fp8,0,0.010419200360774993
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,12,4,128,1,float16,float16,0,0.01048320010304451
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,12,4,128,1,float16,fp8,0,0.010440000146627427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,12,4,128,1,fp8,fp8,0,0.010401599854230881
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,12,12,128,1,float16,float16,0,0.010619200021028518
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,12,12,128,1,float16,fp8,0,0.010443200170993806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,12,12,128,1,fp8,fp8,0,0.010377600044012069
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,12,1,128,1,float16,float16,0,0.01048159971833229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,12,1,128,1,fp8,fp8,0,0.01043199971318245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,12,1,128,1,float16,fp8,0,0.010367999970912933
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,12,2,128,1,float16,float16,0,0.010513599961996078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,12,2,128,1,float16,fp8,0,0.010422399640083313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,12,2,128,1,fp8,fp8,0,0.010468800365924836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,12,4,128,1,float16,float16,0,0.010499200224876404
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,12,4,128,1,float16,fp8,0,0.010385599732398988
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,12,4,128,1,fp8,fp8,0,0.010401599854230881
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,12,12,128,1,float16,float16,0,0.010521599650382995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,12,12,128,1,float16,fp8,0,0.010449600219726563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,12,1,128,1,float16,float16,0,0.010475199669599533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,12,12,128,1,fp8,fp8,0,0.010424000024795533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,12,1,128,1,float16,fp8,0,0.010471999645233154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,12,1,128,1,fp8,fp8,0,0.01032319962978363
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,12,2,128,1,float16,float16,0,0.010398399829864503
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,12,2,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,12,2,128,1,fp8,fp8,0,0.010425599664449692
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,12,4,128,1,float16,float16,0,0.010393600165843963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,12,4,128,1,float16,fp8,0,0.010409600287675857
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,12,12,128,1,float16,float16,0,0.010407999902963639
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,12,12,128,1,float16,fp8,0,0.010441599786281586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,12,12,128,1,fp8,fp8,0,0.01053759977221489
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,12,4,128,1,fp8,fp8,0,0.010760000348091126
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,12,1,128,1,float16,float16,0,0.010441599786281586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,12,1,128,1,float16,fp8,0,0.010438399761915207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,12,1,128,1,fp8,fp8,0,0.010780800133943558
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,12,2,128,1,float16,fp8,0,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,12,2,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,12,2,128,1,fp8,fp8,0,0.01074879989027977
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,12,4,128,1,float16,float16,0,0.01037919968366623
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,12,4,128,1,fp8,fp8,0,0.01043040007352829
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,12,4,128,1,float16,fp8,0,0.010657600313425063
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,12,1,128,1,float16,float16,0,0.07001760005950927
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,12,1,128,1,float16,fp8,0,0.0700432002544403
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,12,1,128,1,fp8,fp8,0,0.06931679844856262
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,12,2,128,1,float16,float16,0,0.0704255998134613
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,12,2,128,1,float16,fp8,0,0.06959999799728393
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,12,2,128,1,fp8,fp8,0,0.06998239755630493
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,12,4,128,1,float16,float16,0,0.07157760262489318
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,12,4,128,1,float16,fp8,0,0.06996160149574279
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,12,12,128,1,float16,float16,0,0.041283199191093446
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,12,4,128,1,fp8,fp8,0,0.06833919882774353
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,12,12,128,1,float16,fp8,0,0.04147039949893951
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,12,12,128,1,fp8,fp8,0,0.041196799278259276
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,12,1,128,1,float16,float16,0,0.03998880088329315
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,12,1,128,1,float16,fp8,0,0.039243200421333314
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,12,1,128,1,fp8,fp8,0,0.039192000031471254
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,12,2,128,1,float16,float16,0,0.04034880101680756
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,12,2,128,1,float16,fp8,0,0.039540800452232364
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,12,2,128,1,fp8,fp8,0,0.03910239934921265
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,12,4,128,1,float16,fp8,0,0.03911679983139038
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,12,4,128,1,float16,float16,0,0.04137600064277649
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,12,4,128,1,fp8,fp8,0,0.039084801077842714
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,12,12,128,1,float16,float16,0,0.026158401370048524
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,12,12,128,1,fp8,fp8,0,0.024857600033283234
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,12,12,128,1,float16,fp8,0,0.026950401067733765
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,12,1,128,1,float16,float16,0,0.024849599599838255
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,12,1,128,1,float16,fp8,0,0.0247296005487442
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,12,1,128,1,fp8,fp8,0,0.025
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,12,2,128,1,float16,float16,0,0.024889600276947022
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,12,2,128,1,float16,fp8,0,0.024724799394607543
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,12,4,128,1,float16,float16,0,0.02489439994096756
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,12,2,128,1,fp8,fp8,0,0.02478239983320236
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,12,4,128,1,float16,fp8,0,0.02476319968700409
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,12,12,128,1,float16,float16,0,0.016795200109481812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,12,12,128,1,float16,fp8,0,0.016579200327396394
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,12,4,128,1,fp8,fp8,0,0.024753600358963013
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,12,12,128,1,fp8,fp8,0,0.016752000153064727
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,12,1,128,1,float16,fp8,0,0.016816000640392303
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,12,1,128,1,fp8,fp8,0,0.01656160056591034
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,12,1,128,1,float16,float16,0,0.01658080071210861
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,12,2,128,1,float16,fp8,0,0.01671359986066818
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,12,2,128,1,float16,float16,0,0.01677280068397522
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,12,2,128,1,fp8,fp8,0,0.01656640022993088
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,12,4,128,1,fp8,fp8,0,0.016548800468444824
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,12,4,128,1,float16,fp8,0,0.016620799899101257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,12,4,128,1,float16,float16,0,0.01672160029411316
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,12,12,128,1,float16,float16,0,0.014929600059986115
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,12,12,128,1,fp8,fp8,0,0.01449120044708252
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,12,12,128,1,float16,fp8,0,0.014718399941921234
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,12,1,128,1,float16,float16,0,0.01408960074186325
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,12,1,128,1,float16,fp8,0,0.014390400052070618
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,12,1,128,1,fp8,fp8,0,0.012780800461769104
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,12,2,128,1,float16,float16,0,0.013711999356746673
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,12,2,128,1,float16,fp8,0,0.01440960019826889
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,12,2,128,1,fp8,fp8,0,0.012596799433231354
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,12,4,128,1,float16,float16,0,0.014179199934005737
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,12,4,128,1,float16,fp8,0,0.014467200636863709
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,12,4,128,1,fp8,fp8,0,0.01250080019235611
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,12,12,128,1,float16,float16,0,0.01058880016207695
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,12,12,128,1,float16,fp8,0,0.010579200088977813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,12,12,128,1,fp8,fp8,0,0.010494399815797806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,12,1,128,1,float16,float16,0,0.010425599664449692
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,12,1,128,1,float16,fp8,0,0.010507199913263321
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,12,1,128,1,fp8,fp8,0,0.010491199791431427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,12,2,128,1,float16,float16,0,0.010377600044012069
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,12,2,128,1,float16,fp8,0,0.01055999994277954
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,12,2,128,1,fp8,fp8,0,0.010521599650382995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,12,4,128,1,float16,float16,0,0.010393600165843963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,12,4,128,1,float16,fp8,0,0.010553599894046783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,12,4,128,1,fp8,fp8,0,0.010494399815797806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,12,12,128,1,float16,float16,0,0.010414399951696397
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,12,12,128,1,float16,fp8,0,0.010279999673366546
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,12,12,128,1,fp8,fp8,0,0.010542400181293488
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,12,1,128,1,float16,float16,0,0.01037440001964569
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,12,1,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,12,1,128,1,fp8,fp8,0,0.010371199995279311
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,12,2,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,12,2,128,1,float16,fp8,0,0.01043199971318245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,12,2,128,1,fp8,fp8,0,0.010524799674749374
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,12,4,128,1,float16,float16,0,0.010385599732398988
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,12,4,128,1,float16,fp8,0,0.010486400127410889
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,12,4,128,1,fp8,fp8,0,0.010436800122261048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,12,12,128,1,float16,float16,0,0.01045759990811348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,12,12,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,12,12,128,1,fp8,fp8,0,0.01048320010304451
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,12,1,128,1,float16,fp8,0,0.010335999727249145
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,12,1,128,1,float16,float16,0,0.010443200170993806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,12,2,128,1,float16,float16,0,0.010491199791431427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,12,1,128,1,fp8,fp8,0,0.010356800258159637
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,12,2,128,1,float16,fp8,0,0.01043040007352829
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,12,4,128,1,float16,float16,0,0.010412800312042236
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,12,4,128,1,float16,fp8,0,0.010387200117111205
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,12,2,128,1,fp8,fp8,0,0.010503999888896942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,12,4,128,1,fp8,fp8,0,0.010407999902963639
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,12,12,128,1,float16,float16,0,0.010857599973678588
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,12,12,128,1,float16,fp8,0,0.010304000228643417
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,12,12,128,1,fp8,fp8,0,0.010335999727249145
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,12,1,128,1,float16,float16,0,0.01032159999012947
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,12,1,128,1,float16,fp8,0,0.0104032002389431
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,12,1,128,1,fp8,fp8,0,0.010419200360774993
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,12,2,128,1,float16,float16,0,0.010487999767065048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,12,2,128,1,float16,fp8,0,0.010427200049161912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,12,2,128,1,fp8,fp8,0,0.01035040020942688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,12,4,128,1,float16,float16,0,0.010391999781131745
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,12,4,128,1,float16,fp8,0,0.010494399815797806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,12,4,128,1,fp8,fp8,0,0.010382399708032609
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,12,1,128,1,float16,float16,0,0.06883999705314636
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,12,1,128,1,float16,fp8,0,0.06375359892845153
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,12,1,128,1,fp8,fp8,0,0.06375839710235595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,12,2,128,1,float16,float16,0,0.06871680021286011
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,12,2,128,1,float16,fp8,0,0.06373599767684937
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,12,2,128,1,fp8,fp8,0,0.06387519836425781
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,12,4,128,1,float16,float16,0,0.06936640143394471
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,12,4,128,1,float16,fp8,0,0.06380320191383362
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,12,4,128,1,fp8,fp8,0,0.06374559998512268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,12,12,128,1,float16,float16,0,0.03922080099582672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,12,12,128,1,float16,fp8,0,0.03708159923553467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,12,12,128,1,fp8,fp8,0,0.037108799815177916
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,12,1,128,1,float16,float16,0,0.03912799954414368
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,12,1,128,1,fp8,fp8,0,0.037083199620246886
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,12,1,128,1,float16,fp8,0,0.03727039992809296
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,12,2,128,1,float16,float16,0,0.039139199256896975
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,12,2,128,1,float16,fp8,0,0.03709760010242462
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,12,4,128,1,float16,float16,0,0.03936319947242737
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,12,2,128,1,fp8,fp8,0,0.03727999925613403
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,12,4,128,1,float16,fp8,0,0.03717280030250549
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,12,4,128,1,fp8,fp8,0,0.037150400876998904
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,12,12,128,1,float16,float16,0,0.02499520033597946
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,12,12,128,1,float16,fp8,0,0.02281759977340698
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,12,12,128,1,fp8,fp8,0,0.024740800261497498
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,12,1,128,1,float16,fp8,0,0.022771200537681578
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,12,1,128,1,float16,float16,0,0.024903999269008638
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,12,1,128,1,fp8,fp8,0,0.024825599789619446
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,12,2,128,1,float16,float16,0,0.02476799935102463
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,12,2,128,1,float16,fp8,0,0.022881600260734557
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,12,2,128,1,fp8,fp8,0,0.02372319996356964
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,12,4,128,1,float16,float16,0,0.024769599735736846
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,12,4,128,1,fp8,fp8,0,0.023313599824905395
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,12,4,128,1,float16,fp8,0,0.023073600232601167
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,12,12,128,1,float16,float16,0,0.016574400663375854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,12,12,128,1,float16,fp8,0,0.016523200273513793
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,12,1,128,1,float16,float16,0,0.016516800224781036
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,12,12,128,1,fp8,fp8,0,0.01676799952983856
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,12,1,128,1,float16,fp8,0,0.016499200463294984
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,12,1,128,1,fp8,fp8,0,0.01656000018119812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,12,2,128,1,float16,float16,0,0.016812799870967864
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,12,2,128,1,float16,fp8,0,0.016601599752902985
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,12,2,128,1,fp8,fp8,0,0.016497600078582763
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,12,4,128,1,float16,fp8,0,0.016499200463294984
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,12,4,128,1,float16,float16,0,0.016816000640392303
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,12,4,128,1,fp8,fp8,0,0.016547200083732606
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,12,12,128,1,float16,float16,0,0.01443679928779602
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,12,12,128,1,float16,fp8,0,0.012675200402736665
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,12,12,128,1,fp8,fp8,0,0.012511999905109405
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,12,1,128,1,float16,float16,0,0.014505599439144135
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,12,1,128,1,float16,fp8,0,0.012703999876976013
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,12,1,128,1,fp8,fp8,0,0.012511999905109405
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,12,2,128,1,float16,float16,0,0.01449279934167862
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,12,2,128,1,fp8,fp8,0,0.012934400141239167
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,12,4,128,1,float16,float16,0,0.014430400729179383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,12,2,128,1,float16,fp8,0,0.012676799297332763
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,12,4,128,1,float16,fp8,0,0.012529599666595458
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,12,4,128,1,fp8,fp8,0,0.012759999930858612
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,12,12,128,1,float16,float16,0,0.012572799623012543
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,12,12,128,1,float16,fp8,0,0.01043199971318245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,12,12,128,1,fp8,fp8,0,0.010502400249242783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,12,1,128,1,float16,float16,0,0.010590399801731109
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,12,1,128,1,float16,fp8,0,0.010524799674749374
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,12,1,128,1,fp8,fp8,0,0.01048320010304451
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,12,2,128,1,float16,float16,0,0.010592000186443329
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,12,2,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,12,2,128,1,fp8,fp8,0,0.010465600341558457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,12,4,128,1,float16,fp8,0,0.01053600013256073
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,12,4,128,1,float16,float16,0,0.01058719977736473
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,12,4,128,1,fp8,fp8,0,0.010521599650382995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,12,12,128,1,float16,float16,0,0.0104592002928257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,12,12,128,1,fp8,fp8,0,0.010340800136327743
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,12,12,128,1,float16,fp8,0,0.01043199971318245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,12,1,128,1,float16,float16,0,0.010452800244092942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,12,1,128,1,float16,fp8,0,0.010377600044012069
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,12,1,128,1,fp8,fp8,0,0.010353600233793258
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,12,2,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,12,2,128,1,float16,fp8,0,0.010395199805498124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,12,2,128,1,fp8,fp8,0,0.0104032002389431
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,12,4,128,1,float16,float16,0,0.010436800122261048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,12,4,128,1,float16,fp8,0,0.010369600355625152
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,12,4,128,1,fp8,fp8,0,0.010380800068378448
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,12,12,128,1,float16,float16,0,0.010465600341558457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,12,12,128,1,float16,fp8,0,0.01032319962978363
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,12,12,128,1,fp8,fp8,0,0.010395199805498124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,12,1,128,1,float16,float16,0,0.010476800054311753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,12,1,128,1,float16,fp8,0,0.010486400127410889
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,12,1,128,1,fp8,fp8,0,0.010385599732398988
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,12,2,128,1,float16,float16,0,0.0104592002928257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,12,2,128,1,float16,fp8,0,0.01043199971318245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,12,2,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,12,4,128,1,float16,float16,0,0.010507199913263321
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,12,4,128,1,float16,fp8,0,0.010435199737548828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,12,4,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,12,12,128,1,float16,float16,0,0.010478399693965912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,12,12,128,1,float16,fp8,0,0.010407999902963639
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,12,12,128,1,fp8,fp8,0,0.01040479987859726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,12,1,128,1,float16,float16,0,0.01045600026845932
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,12,1,128,1,float16,fp8,0,0.010395199805498124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,12,1,128,1,fp8,fp8,0,0.010414399951696397
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,12,2,128,1,float16,float16,0,0.010491199791431427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,12,2,128,1,float16,fp8,0,0.010369600355625152
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,12,2,128,1,fp8,fp8,0,0.010502400249242783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,12,4,128,1,float16,float16,0,0.010416000336408614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,12,4,128,1,float16,fp8,0,0.010356800258159637
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,12,4,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,8,1,128,1,float16,fp8,0,2.7438831329345703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,8,1,128,1,fp8,fp8,0,2.73449592590332
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,8,1,128,1,float16,float16,0,3.5423263549804687
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,8,2,128,1,float16,fp8,0,2.7680368423461914
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,8,2,128,1,float16,float16,0,4.023287963867188
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,8,2,128,1,fp8,fp8,0,3.1482288360595705
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,8,4,128,1,float16,float16,0,3.0024944305419923
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,8,8,128,1,float16,float16,0,1.853536033630371
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,8,4,128,1,float16,fp8,0,2.8706560134887695
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,8,4,128,1,fp8,fp8,0,2.7602720260620117
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,8,8,128,1,float16,fp8,0,1.6527999877929687
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,8,8,128,1,fp8,fp8,0,1.559286403656006
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,8,1,128,1,float16,float16,0,1.599176025390625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,8,1,128,1,float16,fp8,0,1.4726911544799806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,8,1,128,1,fp8,fp8,0,1.4552960395812988
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,8,2,128,1,float16,float16,0,1.6032320022583009
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,8,2,128,1,float16,fp8,0,1.5617648124694825
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,8,2,128,1,fp8,fp8,0,1.4405263900756835
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,8,4,128,1,float16,float16,0,1.585422420501709
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,8,8,128,1,float16,float16,0,0.9008463859558106
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,8,4,128,1,float16,fp8,0,1.6223119735717773
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,8,4,128,1,fp8,fp8,0,1.4560735702514649
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,8,8,128,1,fp8,fp8,0,0.7936992168426513
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,8,8,128,1,float16,fp8,0,0.9843695640563965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,8,1,128,1,float16,float16,0,0.881497573852539
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,8,1,128,1,float16,fp8,0,0.7861472129821777
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,8,1,128,1,fp8,fp8,0,0.7939648151397705
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,8,2,128,1,fp8,fp8,0,0.7833072185516358
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,8,2,128,1,float16,fp8,0,0.894164752960205
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,8,2,128,1,float16,float16,0,0.8736399650573731
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,8,8,128,1,float16,float16,0,0.5269423961639405
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,8,4,128,1,float16,float16,0,0.8814496040344239
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,8,4,128,1,float16,fp8,0,0.7837520122528077
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,8,4,128,1,fp8,fp8,0,0.7905776023864746
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,8,8,128,1,float16,fp8,0,0.4768256187438965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,8,8,128,1,fp8,fp8,0,0.4638768196105957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,8,1,128,1,float16,float16,0,0.504527997970581
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,8,1,128,1,float16,fp8,0,0.46495680809020995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,8,1,128,1,fp8,fp8,0,0.47415838241577146
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,8,2,128,1,float16,float16,0,0.5293200016021729
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,8,2,128,1,float16,fp8,0,0.458238410949707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,8,2,128,1,fp8,fp8,0,0.4641791820526123
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,8,4,128,1,float16,float16,0,0.5101967811584472
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,8,4,128,1,fp8,fp8,0,0.4588895797729492
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,8,4,128,1,float16,fp8,0,0.5783135890960693
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,8,1,128,1,float16,float16,0,1.7841856002807617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,8,1,128,1,float16,fp8,0,1.631332778930664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,8,1,128,1,fp8,fp8,0,1.6377824783325194
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,8,2,128,1,float16,float16,0,1.7721200942993165
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,8,2,128,1,fp8,fp8,0,1.63570556640625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,8,2,128,1,float16,fp8,0,1.8246751785278321
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,8,4,128,1,float16,fp8,0,1.7975103378295898
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,8,4,128,1,float16,float16,0,2.070369529724121
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,8,8,128,1,float16,float16,0,1.0095104217529296
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,8,8,128,1,float16,fp8,0,0.8723855972290039
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,8,4,128,1,fp8,fp8,0,1.6522192001342773
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,8,8,128,1,fp8,fp8,0,1.359614372253418
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,8,1,128,1,float16,float16,0,1.0924079895019532
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,8,1,128,1,float16,fp8,0,0.8714176177978515
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,8,1,128,1,fp8,fp8,0,0.8913344383239746
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,8,2,128,1,float16,float16,0,0.9671216011047363
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,8,2,128,1,float16,fp8,0,0.985267162322998
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,8,2,128,1,fp8,fp8,0,0.870302391052246
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,8,4,128,1,float16,float16,0,0.9606368064880371
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,8,8,128,1,float16,float16,0,0.5580927848815918
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,8,4,128,1,float16,fp8,0,0.9261008262634277
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,8,8,128,1,float16,fp8,0,0.4959568023681641
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,8,4,128,1,fp8,fp8,0,0.8923711776733398
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,8,8,128,1,fp8,fp8,0,0.4909872055053711
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,8,1,128,1,float16,float16,0,0.5390192031860351
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,8,1,128,1,float16,fp8,0,0.49891200065612795
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,8,1,128,1,fp8,fp8,0,0.4938000202178955
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,8,2,128,1,float16,fp8,0,0.49383039474487306
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,8,2,128,1,float16,float16,0,0.5359055995941162
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,8,2,128,1,fp8,fp8,0,0.49401278495788575
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,8,4,128,1,float16,float16,0,0.5488175868988037
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,8,8,128,1,float16,float16,0,0.32907679080963137
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,8,4,128,1,fp8,fp8,0,0.48907041549682617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,8,4,128,1,float16,fp8,0,0.4892576217651367
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,8,8,128,1,float16,fp8,0,0.29647040367126465
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,8,8,128,1,fp8,fp8,0,0.29122400283813477
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,8,1,128,1,float16,fp8,0,0.2956464052200317
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,8,1,128,1,float16,float16,0,0.3153215885162354
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,8,1,128,1,fp8,fp8,0,0.29624960422515867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,8,2,128,1,float16,float16,0,0.31290080547332766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,8,2,128,1,float16,fp8,0,0.2938751935958862
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,8,2,128,1,fp8,fp8,0,0.2922624111175537
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,8,4,128,1,float16,float16,0,0.32184319496154784
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,8,4,128,1,float16,fp8,0,0.2926543951034546
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,8,4,128,1,fp8,fp8,0,0.2926016092300415
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,8,1,128,1,float16,float16,0,1.286575984954834
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,8,1,128,1,float16,fp8,0,1.1844400405883788
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,8,1,128,1,fp8,fp8,0,1.185739231109619
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,8,2,128,1,float16,fp8,0,1.1800671577453614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,8,2,128,1,fp8,fp8,0,1.1842432022094727
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,8,2,128,1,float16,float16,0,1.5597344398498536
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,8,4,128,1,float16,fp8,0,1.1828399658203126
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,8,4,128,1,float16,float16,0,1.3843071937561036
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,8,4,128,1,fp8,fp8,0,1.3521408081054687
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,8,8,128,1,float16,float16,0,0.7355008125305176
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,8,8,128,1,float16,fp8,0,0.7582880020141601
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,8,8,128,1,fp8,fp8,0,0.6473999977111816
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,8,1,128,1,float16,float16,0,0.7075295925140381
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,8,1,128,1,fp8,fp8,0,0.6473231792449952
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,8,1,128,1,float16,fp8,0,0.650489616394043
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,8,2,128,1,float16,float16,0,0.6866288185119629
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,8,2,128,1,float16,fp8,0,0.6773375988006591
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,8,2,128,1,fp8,fp8,0,0.6463791847229003
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,8,4,128,1,float16,float16,0,0.7105040073394775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,8,4,128,1,float16,fp8,0,0.6365664005279541
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,8,8,128,1,float16,float16,0,0.4207920074462891
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,8,8,128,1,float16,fp8,0,0.36741759777069094
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,8,4,128,1,fp8,fp8,0,0.7608367919921875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,8,8,128,1,fp8,fp8,0,0.36550240516662597
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,8,1,128,1,float16,fp8,0,0.36544640064239503
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,8,1,128,1,float16,float16,0,0.39757599830627444
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,8,1,128,1,fp8,fp8,0,0.3663503885269165
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,8,2,128,1,float16,float16,0,0.39346721172332766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,8,2,128,1,float16,fp8,0,0.36443679332733153
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,8,2,128,1,fp8,fp8,0,0.3655184030532837
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,8,4,128,1,float16,float16,0,0.4029088020324707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,8,8,128,1,float16,float16,0,0.25464320182800293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,8,4,128,1,fp8,fp8,0,0.3649535894393921
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,8,4,128,1,float16,fp8,0,0.3634432077407837
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,8,8,128,1,float16,fp8,0,0.2253472089767456
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,8,8,128,1,fp8,fp8,0,0.22797279357910155
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,8,1,128,1,float16,float16,0,0.2404848098754883
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,8,1,128,1,float16,fp8,0,0.2268143892288208
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,8,1,128,1,fp8,fp8,0,0.22533600330352782
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,8,2,128,1,float16,float16,0,0.24317760467529298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,8,2,128,1,float16,fp8,0,0.22627840042114258
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,8,2,128,1,fp8,fp8,0,0.22592320442199706
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,8,4,128,1,float16,float16,0,0.24391679763793944
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,8,4,128,1,float16,fp8,0,0.22433600425720215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,8,4,128,1,fp8,fp8,0,0.22471840381622316
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,8,1,128,1,float16,float16,0,1.5933648109436036
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,8,1,128,1,float16,fp8,0,1.5363712310791016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,8,1,128,1,fp8,fp8,0,1.5437935829162597
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,8,2,128,1,float16,float16,0,1.643502426147461
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,8,2,128,1,fp8,fp8,0,1.5339920043945312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,8,2,128,1,float16,fp8,0,1.6629152297973633
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,8,4,128,1,float16,float16,0,1.703446388244629
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,8,4,128,1,float16,fp8,0,1.9572032928466796
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,8,8,128,1,float16,float16,0,0.9232720375061035
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,8,4,128,1,fp8,fp8,0,1.5361040115356446
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,8,8,128,1,float16,fp8,0,1.2654720306396485
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,8,8,128,1,fp8,fp8,0,0.8792783737182617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,8,1,128,1,float16,float16,0,1.0296784400939942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,8,1,128,1,float16,fp8,0,0.8143024444580078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,8,1,128,1,fp8,fp8,0,0.816312026977539
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,8,2,128,1,float16,float16,0,0.8650927543640137
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,8,2,128,1,float16,fp8,0,0.9942959785461426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,8,2,128,1,fp8,fp8,0,0.8136112213134765
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,8,4,128,1,float16,float16,0,0.8964159965515137
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,8,4,128,1,float16,fp8,0,0.8094367980957031
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,8,8,128,1,float16,float16,0,0.5081984043121338
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,8,8,128,1,float16,fp8,0,0.48046078681945803
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,8,8,128,1,fp8,fp8,0,0.44710559844970704
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,8,4,128,1,fp8,fp8,0,0.8097647666931153
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,8,1,128,1,float16,float16,0,0.4799344062805176
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,8,1,128,1,fp8,fp8,0,0.44507360458374023
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,8,1,128,1,float16,fp8,0,0.44959201812744143
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,8,2,128,1,float16,float16,0,0.47444639205932615
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,8,2,128,1,float16,fp8,0,0.44853920936584474
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,8,2,128,1,fp8,fp8,0,0.4447951793670654
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,8,4,128,1,float16,float16,0,0.48924641609191893
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,8,4,128,1,float16,fp8,0,0.44321279525756835
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,8,8,128,1,float16,fp8,0,0.261678409576416
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,8,8,128,1,float16,float16,0,0.29170238971710205
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,8,8,128,1,fp8,fp8,0,0.2603823900222778
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,8,4,128,1,fp8,fp8,0,0.44852480888366697
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,8,1,128,1,float16,float16,0,0.276859188079834
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,8,1,128,1,float16,fp8,0,0.25960640907287597
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,8,1,128,1,fp8,fp8,0,0.2586143970489502
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,8,2,128,1,float16,float16,0,0.2773087978363037
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,8,2,128,1,float16,fp8,0,0.2604592084884644
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,8,2,128,1,fp8,fp8,0,0.25628640651702883
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,8,4,128,1,float16,float16,0,0.2799983978271484
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,8,4,128,1,float16,fp8,0,0.2586303949356079
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,8,8,128,1,float16,float16,0,0.18278720378875732
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,8,8,128,1,float16,fp8,0,0.16361119747161865
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,8,4,128,1,fp8,fp8,0,0.26001439094543455
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,8,8,128,1,fp8,fp8,0,0.1623088002204895
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,8,1,128,1,float16,float16,0,0.17274559736251832
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,8,1,128,1,float16,fp8,0,0.16231679916381836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,8,1,128,1,fp8,fp8,0,0.16390399932861327
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,8,2,128,1,float16,float16,0,0.1729632019996643
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,8,2,128,1,float16,fp8,0,0.16244959831237793
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,8,2,128,1,fp8,fp8,0,0.1637312054634094
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,8,4,128,1,float16,float16,0,0.17565120458602906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,8,4,128,1,float16,fp8,0,0.16220320463180543
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,8,4,128,1,fp8,fp8,0,0.1633407950401306
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,8,1,128,1,float16,float16,0,0.9710240364074707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,8,1,128,1,float16,fp8,0,0.938697624206543
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,8,1,128,1,fp8,fp8,0,0.941652774810791
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,8,2,128,1,float16,fp8,0,0.9350735664367675
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,8,2,128,1,float16,float16,0,0.991209602355957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,8,2,128,1,fp8,fp8,0,0.9396464347839355
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,8,4,128,1,float16,float16,0,1.0016448020935058
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,8,4,128,1,fp8,fp8,0,0.9442399978637696
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,8,4,128,1,float16,fp8,0,1.309607982635498
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,8,8,128,1,float16,fp8,0,0.5058928012847901
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,8,8,128,1,float16,float16,0,0.5976240158081054
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,8,8,128,1,fp8,fp8,0,0.5011919975280762
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,8,1,128,1,float16,float16,0,0.5321904182434082
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,8,1,128,1,float16,fp8,0,0.59890718460083
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,8,1,128,1,fp8,fp8,0,0.5047887802124024
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,8,2,128,1,float16,float16,0,0.5346047878265381
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,8,2,128,1,float16,fp8,0,0.5587056159973145
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,8,2,128,1,fp8,fp8,0,0.5020336151123047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,8,8,128,1,float16,float16,0,0.31881439685821533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,8,4,128,1,float16,float16,0,0.5493360042572022
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,8,4,128,1,float16,fp8,0,0.5008895874023438
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,8,4,128,1,fp8,fp8,0,0.5346447944641113
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,8,8,128,1,float16,fp8,0,0.28355679512023924
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,8,8,128,1,fp8,fp8,0,0.28715200424194337
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,8,1,128,1,float16,float16,0,0.2938816070556641
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,8,1,128,1,float16,fp8,0,0.2941423892974854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,8,1,128,1,fp8,fp8,0,0.2811743974685669
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,8,2,128,1,float16,float16,0,0.30056641101837156
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,8,2,128,1,float16,fp8,0,0.28011360168457033
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,8,2,128,1,fp8,fp8,0,0.2852895975112915
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,8,4,128,1,float16,float16,0,0.30438079833984377
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,8,8,128,1,float16,float16,0,0.18783680200576783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,8,4,128,1,float16,fp8,0,0.28486878871917726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,8,4,128,1,fp8,fp8,0,0.2811199903488159
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,8,8,128,1,float16,fp8,0,0.16604959964752197
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,8,8,128,1,fp8,fp8,0,0.16829919815063477
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,8,1,128,1,float16,float16,0,0.1725648045539856
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,8,1,128,1,float16,fp8,0,0.16548800468444824
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,8,1,128,1,fp8,fp8,0,0.16629120111465454
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,8,2,128,1,float16,float16,0,0.17263360023498536
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,8,2,128,1,float16,fp8,0,0.16579680442810057
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,8,2,128,1,fp8,fp8,0,0.16616480350494384
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,8,4,128,1,float16,float16,0,0.1779103994369507
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,8,4,128,1,float16,fp8,0,0.16666239500045776
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,8,4,128,1,fp8,fp8,0,0.1656048059463501
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,8,8,128,1,float16,float16,0,0.11459840536117553
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,8,8,128,1,float16,fp8,0,0.10738240480422974
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,8,8,128,1,fp8,fp8,0,0.10703200101852417
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,8,1,128,1,float16,float16,0,0.11239680051803588
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,8,1,128,1,fp8,fp8,0,0.10678080320358277
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,8,1,128,1,float16,fp8,0,0.1047760009765625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,8,2,128,1,float16,float16,0,0.11184639930725097
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,8,2,128,1,float16,fp8,0,0.10763360261917114
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,8,2,128,1,fp8,fp8,0,0.10775200128555298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,8,4,128,1,float16,float16,0,0.10977280139923096
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,8,4,128,1,float16,fp8,0,0.10752160549163818
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,8,4,128,1,fp8,fp8,0,0.10541599988937378
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,8,1,128,1,float16,float16,0,0.9102560043334961
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,8,1,128,1,float16,fp8,0,0.9149344444274903
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,8,2,128,1,float16,float16,0,0.9169360160827636
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,8,1,128,1,fp8,fp8,0,0.9195247650146484
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,8,2,128,1,float16,fp8,0,0.9160592079162597
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,8,2,128,1,fp8,fp8,0,0.914027214050293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,8,4,128,1,float16,fp8,0,0.9152144432067871
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,8,4,128,1,float16,float16,0,0.9673551559448242
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,8,8,128,1,float16,fp8,0,0.48114237785339353
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,8,8,128,1,float16,float16,0,0.5332640171051025
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,8,8,128,1,fp8,fp8,0,0.5831999778747559
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,8,4,128,1,fp8,fp8,0,0.920081615447998
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,8,1,128,1,float16,float16,0,0.48721919059753416
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,8,1,128,1,float16,fp8,0,0.48339200019836426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,8,1,128,1,fp8,fp8,0,0.5937679767608642
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,8,2,128,1,float16,float16,0,0.48739042282104494
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,8,2,128,1,float16,fp8,0,0.4848336219787598
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,8,2,128,1,fp8,fp8,0,0.4847568035125732
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,8,4,128,1,float16,float16,0,0.54060959815979
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,8,4,128,1,float16,fp8,0,0.4838560104370117
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,8,8,128,1,float16,float16,0,0.29351999759674074
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,8,4,128,1,fp8,fp8,0,0.5164591789245605
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,8,8,128,1,float16,fp8,0,0.26785600185394287
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,8,8,128,1,fp8,fp8,0,0.2643455982208252
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,8,1,128,1,float16,float16,0,0.26704959869384765
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,8,1,128,1,float16,fp8,0,0.2747711896896362
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,8,1,128,1,fp8,fp8,0,0.26877760887145996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,8,2,128,1,float16,float16,0,0.2692320108413696
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,8,2,128,1,float16,fp8,0,0.2658112049102783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,8,2,128,1,fp8,fp8,0,0.2673919916152954
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,8,4,128,1,float16,float16,0,0.2817071914672852
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,8,4,128,1,float16,fp8,0,0.2649231910705566
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,8,4,128,1,fp8,fp8,0,0.2636672019958496
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,8,8,128,1,float16,float16,0,0.17225760221481323
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,8,8,128,1,float16,fp8,0,0.15569759607315065
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,8,8,128,1,fp8,fp8,0,0.15312000513076782
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,8,1,128,1,float16,float16,0,0.15126240253448486
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,8,1,128,1,fp8,fp8,0,0.15230720043182372
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,8,1,128,1,float16,fp8,0,0.1511023998260498
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,8,2,128,1,float16,float16,0,0.15271999835968017
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,8,2,128,1,float16,fp8,0,0.15156960487365723
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,8,2,128,1,fp8,fp8,0,0.1508944034576416
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,8,4,128,1,float16,float16,0,0.1599951982498169
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,8,4,128,1,fp8,fp8,0,0.1504032015800476
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,8,4,128,1,float16,fp8,0,0.15255680084228515
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,8,8,128,1,float16,float16,0,0.10411200523376465
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,8,8,128,1,float16,fp8,0,0.0944815993309021
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,8,1,128,1,float16,float16,0,0.09661279916763306
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,8,8,128,1,fp8,fp8,0,0.09490560293197632
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,8,1,128,1,float16,fp8,0,0.09509599804878235
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,8,1,128,1,fp8,fp8,0,0.09453920125961304
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,8,2,128,1,float16,float16,0,0.09765759706497193
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,8,2,128,1,float16,fp8,0,0.09460800290107726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,8,2,128,1,fp8,fp8,0,0.09441440105438233
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,8,4,128,1,float16,float16,0,0.09949600100517272
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,8,4,128,1,float16,fp8,0,0.09452319741249085
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,8,4,128,1,fp8,fp8,0,0.09430720210075379
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,8,8,128,1,float16,float16,0,0.06397119760513306
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,8,8,128,1,float16,fp8,0,0.05968160033226013
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,8,1,128,1,float16,float16,0,0.06166399717330932
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,8,8,128,1,fp8,fp8,0,0.05963680148124695
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,8,1,128,1,float16,fp8,0,0.05963360071182251
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,8,2,128,1,float16,float16,0,0.060102397203445436
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,8,1,128,1,fp8,fp8,0,0.05987039804458618
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,8,2,128,1,float16,fp8,0,0.05964159965515137
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,8,4,128,1,float16,float16,0,0.061791998147964475
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,8,4,128,1,float16,fp8,0,0.06085439920425415
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,8,2,128,1,fp8,fp8,0,0.05971199870109558
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,8,4,128,1,fp8,fp8,0,0.05972319841384888
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,8,1,128,1,float16,float16,0,0.5678815841674805
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,8,1,128,1,float16,fp8,0,0.5805359840393066
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,8,1,128,1,fp8,fp8,0,0.5883552074432373
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,8,2,128,1,float16,float16,0,0.5609551906585694
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,8,2,128,1,float16,fp8,0,0.5794976234436036
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,8,2,128,1,fp8,fp8,0,0.5863808155059814
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,8,4,128,1,float16,fp8,0,0.578652811050415
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,8,4,128,1,float16,float16,0,0.6532879829406738
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,8,4,128,1,fp8,fp8,0,0.5840144157409668
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,8,8,128,1,float16,float16,0,0.3397952079772949
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,8,8,128,1,float16,fp8,0,0.3627935886383057
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,8,8,128,1,fp8,fp8,0,0.3111936092376709
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,8,1,128,1,float16,float16,0,0.3003871917724609
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,8,1,128,1,float16,fp8,0,0.3619807958602905
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,8,1,128,1,fp8,fp8,0,0.3139296054840088
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,8,2,128,1,float16,float16,0,0.29995839595794677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,8,2,128,1,float16,fp8,0,0.3126512050628662
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,8,2,128,1,fp8,fp8,0,0.31272799968719484
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,8,4,128,1,float16,float16,0,0.3128144025802612
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,8,4,128,1,fp8,fp8,0,0.31245439052581786
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,8,4,128,1,float16,fp8,0,0.3109280109405518
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,8,8,128,1,float16,float16,0,0.1902511954307556
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,8,8,128,1,fp8,fp8,0,0.1747167944908142
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,8,8,128,1,float16,fp8,0,0.17202719449996948
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,8,1,128,1,float16,float16,0,0.16913440227508544
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,8,1,128,1,float16,fp8,0,0.17106720209121704
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,8,2,128,1,float16,float16,0,0.16791679859161376
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,8,1,128,1,fp8,fp8,0,0.17187360525131226
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,8,2,128,1,fp8,fp8,0,0.17104640007019042
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,8,2,128,1,float16,fp8,0,0.17285120487213135
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,8,4,128,1,float16,fp8,0,0.17147840261459352
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,8,4,128,1,float16,float16,0,0.17580480575561525
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,8,4,128,1,fp8,fp8,0,0.17204960584640502
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,8,8,128,1,float16,fp8,0,0.10007359981536865
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,8,8,128,1,fp8,fp8,0,0.10126080513000488
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,8,8,128,1,float16,float16,0,0.11187360286712647
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,8,1,128,1,float16,float16,0,0.09793599843978881
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,8,1,128,1,float16,fp8,0,0.09907199740409851
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,8,1,128,1,fp8,fp8,0,0.09895359873771667
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,8,2,128,1,float16,float16,0,0.09987840056419373
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,8,2,128,1,float16,fp8,0,0.09964799880981445
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,8,2,128,1,fp8,fp8,0,0.09975680112838745
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,8,4,128,1,float16,float16,0,0.10178879499435425
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,8,4,128,1,fp8,fp8,0,0.10003839731216431
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,8,4,128,1,float16,fp8,0,0.09978240132331848
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,8,8,128,1,float16,float16,0,0.0698751986026764
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,8,8,128,1,float16,fp8,0,0.06339520215988159
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,8,8,128,1,fp8,fp8,0,0.06395360231399536
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,8,1,128,1,float16,fp8,0,0.06377919912338256
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,8,1,128,1,float16,float16,0,0.06380159854888916
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,8,1,128,1,fp8,fp8,0,0.06264960169792175
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,8,2,128,1,float16,float16,0,0.06372479796409607
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,8,2,128,1,float16,fp8,0,0.06369280219078063
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,8,4,128,1,float16,float16,0,0.06496480107307434
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,8,2,128,1,fp8,fp8,0,0.06565120220184326
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,8,4,128,1,float16,fp8,0,0.06371679902076721
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,8,4,128,1,fp8,fp8,0,0.06375359892845153
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,8,8,128,1,float16,float16,0,0.049379199743270874
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,8,8,128,1,fp8,fp8,0,0.04747520089149475
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,8,8,128,1,float16,fp8,0,0.04737440049648285
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,8,1,128,1,float16,float16,0,0.04737760126590729
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,8,1,128,1,float16,fp8,0,0.0472896009683609
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,8,1,128,1,fp8,fp8,0,0.04739840030670166
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,8,2,128,1,float16,float16,0,0.04737440049648285
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,8,2,128,1,float16,fp8,0,0.047336000204086306
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,8,2,128,1,fp8,fp8,0,0.047328001260757445
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,8,4,128,1,float16,float16,0,0.04735200107097626
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,8,4,128,1,float16,fp8,0,0.047279998660087585
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,8,4,128,1,fp8,fp8,0,0.04736480116844177
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,8,1,128,1,float16,float16,0,0.5493343830108642
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,8,1,128,1,float16,fp8,0,0.6052576065063476
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,8,1,128,1,fp8,fp8,0,0.6085455894470215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,8,2,128,1,float16,float16,0,0.5431568145751953
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,8,2,128,1,float16,fp8,0,0.6036511898040772
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,8,2,128,1,fp8,fp8,0,0.6067440032958984
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,8,4,128,1,float16,float16,0,0.5847343921661377
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,8,4,128,1,fp8,fp8,0,0.6049727916717529
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,8,4,128,1,float16,fp8,0,0.6021327972412109
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,8,8,128,1,float16,float16,0,0.37304160594940183
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,8,8,128,1,float16,fp8,0,0.31902399063110354
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,8,8,128,1,fp8,fp8,0,0.31724960803985597
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,8,1,128,1,float16,float16,0,0.31389920711517333
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,8,1,128,1,float16,fp8,0,0.321726393699646
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,8,1,128,1,fp8,fp8,0,0.31988000869750977
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,8,2,128,1,float16,float16,0,0.3155168056488037
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,8,2,128,1,float16,fp8,0,0.3202784061431885
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,8,2,128,1,fp8,fp8,0,0.3184272050857544
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,8,4,128,1,float16,float16,0,0.3342303991317749
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,8,8,128,1,float16,float16,0,0.19528160095214844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,8,4,128,1,float16,fp8,0,0.3194528102874756
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,8,4,128,1,fp8,fp8,0,0.31876800060272215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,8,8,128,1,float16,fp8,0,0.17502559423446656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,8,1,128,1,float16,float16,0,0.16191200017929078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,8,8,128,1,fp8,fp8,0,0.17377599477767944
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,8,1,128,1,float16,fp8,0,0.17192480564117432
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,8,1,128,1,fp8,fp8,0,0.17188800573349
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,8,2,128,1,float16,float16,0,0.16116800308227539
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,8,2,128,1,float16,fp8,0,0.17322239875793458
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,8,2,128,1,fp8,fp8,0,0.17168960571289063
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,8,4,128,1,float16,float16,0,0.1713296055793762
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,8,8,128,1,float16,float16,0,0.1093999981880188
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,8,4,128,1,float16,fp8,0,0.17375199794769286
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,8,4,128,1,fp8,fp8,0,0.171452796459198
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,8,8,128,1,float16,fp8,0,0.09847840070724487
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,8,8,128,1,fp8,fp8,0,0.09750720262527465
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,8,1,128,1,float16,float16,0,0.09164959788322449
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,8,1,128,1,float16,fp8,0,0.09643999934196472
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,8,1,128,1,fp8,fp8,0,0.09481120109558105
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,8,2,128,1,float16,float16,0,0.09305279850959777
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,8,2,128,1,float16,fp8,0,0.09628480076789855
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,8,2,128,1,fp8,fp8,0,0.09675679802894592
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,8,4,128,1,float16,float16,0,0.09797599911689758
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,8,4,128,1,float16,fp8,0,0.09631360173225403
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,8,4,128,1,fp8,fp8,0,0.09624959826469422
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,8,8,128,1,float16,float16,0,0.06662719845771789
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,8,8,128,1,float16,fp8,0,0.059620797634124756
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,8,8,128,1,fp8,fp8,0,0.059785598516464235
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,8,1,128,1,float16,float16,0,0.05952479839324951
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,8,1,128,1,float16,fp8,0,0.059646397829055786
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,8,1,128,1,fp8,fp8,0,0.05960800051689148
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,8,2,128,1,float16,float16,0,0.05924640297889709
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,8,2,128,1,float16,fp8,0,0.05960800051689148
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,8,2,128,1,fp8,fp8,0,0.059887999296188356
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,8,4,128,1,float16,float16,0,0.060215997695922854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,8,4,128,1,float16,fp8,0,0.05968480110168457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,8,4,128,1,fp8,fp8,0,0.059564799070358276
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,8,8,128,1,float16,float16,0,0.04019359946250915
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,8,8,128,1,float16,fp8,0,0.03720960021018982
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,8,8,128,1,fp8,fp8,0,0.03792960047721863
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,8,1,128,1,float16,float16,0,0.03698239922523498
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,8,1,128,1,float16,fp8,0,0.03713760077953339
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,8,1,128,1,fp8,fp8,0,0.03734399974346161
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,8,2,128,1,float16,fp8,0,0.03705599904060364
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,8,2,128,1,float16,float16,0,0.03716320097446442
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,8,2,128,1,fp8,fp8,0,0.037150400876998904
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,8,4,128,1,float16,float16,0,0.03716000020503998
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,8,4,128,1,float16,fp8,0,0.03714079856872558
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,8,4,128,1,fp8,fp8,0,0.03915359973907471
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,8,8,128,1,float16,float16,0,0.03698239922523498
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,8,8,128,1,float16,fp8,0,0.03520799875259399
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,8,1,128,1,float16,float16,0,0.03498719930648804
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,8,8,128,1,fp8,fp8,0,0.0349727988243103
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,8,1,128,1,float16,fp8,0,0.03503519892692566
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,8,1,128,1,fp8,fp8,0,0.03510720133781433
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,8,2,128,1,float16,float16,0,0.03510079979896545
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,8,2,128,1,float16,fp8,0,0.03504480123519897
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,8,2,128,1,fp8,fp8,0,0.03503359854221344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,8,4,128,1,float16,float16,0,0.035124799609184264
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,8,4,128,1,float16,fp8,0,0.03513599932193756
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,8,4,128,1,fp8,fp8,0,0.035006400942802426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,8,1,128,1,float16,float16,0,0.35480000972747805
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,8,1,128,1,fp8,fp8,0,0.4061295986175537
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,8,2,128,1,float16,float16,0,0.3495680093765259
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,8,1,128,1,float16,fp8,0,0.40082879066467286
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,8,2,128,1,float16,fp8,0,0.4011375904083252
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,8,2,128,1,fp8,fp8,0,0.403934383392334
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,8,4,128,1,float16,fp8,0,0.39957120418548586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,8,4,128,1,float16,float16,0,0.37701919078826907
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,8,8,128,1,float16,fp8,0,0.21403200626373292
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,8,8,128,1,float16,float16,0,0.23439199924468995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,8,8,128,1,fp8,fp8,0,0.21219360828399658
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,8,4,128,1,fp8,fp8,0,0.4002992153167725
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,8,1,128,1,float16,float16,0,0.18956960439682008
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,8,2,128,1,float16,float16,0,0.19129279851913453
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,8,1,128,1,fp8,fp8,0,0.2133807897567749
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,8,1,128,1,float16,fp8,0,0.21568639278411866
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,8,2,128,1,float16,fp8,0,0.21402080059051515
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,8,2,128,1,fp8,fp8,0,0.2127840042114258
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,8,4,128,1,float16,float16,0,0.20468640327453613
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,8,4,128,1,float16,fp8,0,0.2139120101928711
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,8,8,128,1,float16,float16,0,0.1285663962364197
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,8,8,128,1,float16,fp8,0,0.11723840236663818
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,8,4,128,1,fp8,fp8,0,0.21249279975891114
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,8,8,128,1,fp8,fp8,0,0.11603039503097534
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,8,1,128,1,float16,float16,0,0.10574400424957275
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,8,1,128,1,float16,fp8,0,0.11410239934921265
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,8,1,128,1,fp8,fp8,0,0.11404000520706177
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,8,2,128,1,float16,float16,0,0.10591520071029663
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,8,2,128,1,float16,fp8,0,0.1148527979850769
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,8,2,128,1,fp8,fp8,0,0.11427359580993653
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,8,4,128,1,float16,float16,0,0.1142799973487854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,8,4,128,1,float16,fp8,0,0.11579999923706055
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,8,8,128,1,float16,float16,0,0.07649599909782409
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,8,4,128,1,fp8,fp8,0,0.11593439579010009
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,8,8,128,1,float16,fp8,0,0.06779999732971191
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,8,1,128,1,float16,float16,0,0.06206880211830139
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,8,8,128,1,fp8,fp8,0,0.06803039908409118
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,8,1,128,1,float16,fp8,0,0.06707360148429871
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,8,1,128,1,fp8,fp8,0,0.06769919991493226
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,8,2,128,1,float16,float16,0,0.06322879791259765
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,8,2,128,1,fp8,fp8,0,0.06700639724731446
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,8,4,128,1,float16,float16,0,0.06617599725723267
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,8,2,128,1,float16,fp8,0,0.067331200838089
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,8,4,128,1,float16,fp8,0,0.06787199974060058
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,8,4,128,1,fp8,fp8,0,0.06791840195655822
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,8,8,128,1,float16,float16,0,0.04691999852657318
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,8,8,128,1,fp8,fp8,0,0.04327679872512817
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,8,1,128,1,float16,float16,0,0.04206559956073761
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,8,8,128,1,float16,fp8,0,0.04324800074100495
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,8,1,128,1,float16,fp8,0,0.04327200055122375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,8,1,128,1,fp8,fp8,0,0.043424001336097716
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,8,2,128,1,float16,float16,0,0.04100640118122101
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,8,2,128,1,float16,fp8,0,0.04330720007419586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,8,2,128,1,fp8,fp8,0,0.04382559955120087
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,8,4,128,1,float16,float16,0,0.04320319890975952
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,8,4,128,1,float16,fp8,0,0.04323039948940277
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,8,8,128,1,float16,float16,0,0.03198400139808655
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,8,4,128,1,fp8,fp8,0,0.04318400025367737
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,8,8,128,1,float16,fp8,0,0.030939200520515443
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,8,1,128,1,float16,float16,0,0.028948798775672913
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,8,8,128,1,fp8,fp8,0,0.03094240128993988
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,8,1,128,1,float16,fp8,0,0.030902400612831116
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,8,1,128,1,fp8,fp8,0,0.030899199843406677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,8,2,128,1,float16,float16,0,0.028968000411987306
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,8,2,128,1,float16,fp8,0,0.03107360005378723
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,8,2,128,1,fp8,fp8,0,0.030895999073982237
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,8,4,128,1,float16,float16,0,0.03086079955101013
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,8,4,128,1,float16,fp8,0,0.030964800715446474
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,8,8,128,1,float16,float16,0,0.029020801186561584
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,8,4,128,1,fp8,fp8,0,0.030913600325584413
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,8,8,128,1,float16,fp8,0,0.02911199927330017
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,8,8,128,1,fp8,fp8,0,0.028841599822044373
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,8,1,128,1,float16,float16,0,0.02884480059146881
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,8,1,128,1,float16,fp8,0,0.029041600227355958
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,8,1,128,1,fp8,fp8,0,0.028889599442481994
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,8,2,128,1,float16,float16,0,0.028838399052619933
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,8,2,128,1,float16,fp8,0,0.029039999842643736
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,8,2,128,1,fp8,fp8,0,0.028832000494003297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,8,4,128,1,float16,float16,0,0.028835201263427736
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,8,4,128,1,float16,fp8,0,0.02895359992980957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,8,4,128,1,fp8,fp8,0,0.028857600688934327
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,8,1,128,1,float16,float16,0,0.3701535940170288
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,8,1,128,1,fp8,fp8,0,0.4437295913696289
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,8,1,128,1,float16,fp8,0,0.44832959175109866
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,8,2,128,1,float16,float16,0,0.36673119068145754
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,8,2,128,1,float16,fp8,0,0.44238719940185545
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,8,2,128,1,fp8,fp8,0,0.4464288234710693
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,8,4,128,1,float16,float16,0,0.4052976131439209
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,8,4,128,1,float16,fp8,0,0.44136958122253417
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,8,4,128,1,fp8,fp8,0,0.44498558044433595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,8,8,128,1,float16,float16,0,0.2525887966156006
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,8,8,128,1,float16,fp8,0,0.232259202003479
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,8,8,128,1,fp8,fp8,0,0.23308959007263183
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,8,1,128,1,float16,float16,0,0.19692959785461425
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,8,1,128,1,float16,fp8,0,0.23310399055480957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,8,2,128,1,float16,float16,0,0.19610400199890138
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,8,1,128,1,fp8,fp8,0,0.23462400436401368
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,8,2,128,1,float16,fp8,0,0.23309600353240967
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,8,2,128,1,fp8,fp8,0,0.23153278827667237
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,8,4,128,1,float16,float16,0,0.2160559892654419
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,8,4,128,1,float16,fp8,0,0.23257920742034913
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,8,4,128,1,fp8,fp8,0,0.23215200901031494
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,8,8,128,1,float16,float16,0,0.13617279529571533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,8,8,128,1,float16,fp8,0,0.12729439735412598
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,8,8,128,1,fp8,fp8,0,0.12578239440917968
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,8,1,128,1,float16,float16,0,0.10706880092620849
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,8,1,128,1,float16,fp8,0,0.12375520467758179
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,8,1,128,1,fp8,fp8,0,0.12571680545806885
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,8,2,128,1,float16,float16,0,0.10772160291671753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,8,2,128,1,float16,fp8,0,0.1246448040008545
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,8,4,128,1,float16,float16,0,0.1193168044090271
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,8,2,128,1,fp8,fp8,0,0.12372640371322632
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,8,4,128,1,float16,fp8,0,0.1254639983177185
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,8,4,128,1,fp8,fp8,0,0.1251263976097107
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,8,8,128,1,float16,float16,0,0.08009600043296813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,8,8,128,1,float16,fp8,0,0.07050719857215881
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,8,8,128,1,fp8,fp8,0,0.07115200161933899
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,8,1,128,1,float16,float16,0,0.06048480272293091
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,8,1,128,1,float16,fp8,0,0.06992959976196289
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,8,1,128,1,fp8,fp8,0,0.06901760101318359
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,8,2,128,1,float16,float16,0,0.06174399852752686
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,8,2,128,1,fp8,fp8,0,0.06851680278778076
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,8,2,128,1,float16,fp8,0,0.06993920207023621
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,8,4,128,1,float16,float16,0,0.06780800223350525
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,8,4,128,1,float16,fp8,0,0.0699887990951538
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,8,4,128,1,fp8,fp8,0,0.06989439725875854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,8,8,128,1,float16,float16,0,0.04708000123500824
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,8,8,128,1,float16,fp8,0,0.04319359958171844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,8,1,128,1,float16,float16,0,0.03920319974422455
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,8,8,128,1,fp8,fp8,0,0.04324800074100495
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,8,1,128,1,float16,fp8,0,0.04317919909954071
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,8,2,128,1,float16,float16,0,0.03919520080089569
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,8,1,128,1,fp8,fp8,0,0.04324159920215607
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,8,2,128,1,float16,fp8,0,0.043137601017951964
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,8,2,128,1,fp8,fp8,0,0.04323680102825165
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,8,4,128,1,float16,float16,0,0.041473600268363955
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,8,4,128,1,float16,fp8,0,0.04280480146408081
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,8,8,128,1,float16,float16,0,0.02821600139141083
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,8,8,128,1,float16,fp8,0,0.026830399036407472
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,8,4,128,1,fp8,fp8,0,0.04323199987411499
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,8,1,128,1,float16,float16,0,0.02476159930229187
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,8,8,128,1,fp8,fp8,0,0.026849600672721862
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,8,1,128,1,float16,fp8,0,0.026848000288009644
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,8,1,128,1,fp8,fp8,0,0.0267984002828598
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,8,2,128,1,float16,float16,0,0.024771200120449068
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,8,2,128,1,float16,fp8,0,0.026892799139022826
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,8,2,128,1,fp8,fp8,0,0.026793599128723145
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,8,4,128,1,float16,float16,0,0.024864000082015992
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,8,4,128,1,float16,fp8,0,0.026995199918746948
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,8,4,128,1,fp8,fp8,0,0.026825600862503053
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,8,8,128,1,float16,float16,0,0.02479359954595566
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,8,8,128,1,float16,fp8,0,0.02476319968700409
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,8,1,128,1,float16,float16,0,0.022678400576114654
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,8,8,128,1,fp8,fp8,0,0.0247296005487442
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,8,1,128,1,float16,fp8,0,0.024675199389457704
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,8,1,128,1,fp8,fp8,0,0.02282399982213974
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,8,2,128,1,float16,float16,0,0.022694399952888487
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,8,2,128,1,float16,fp8,0,0.024751999974250795
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,8,4,128,1,float16,float16,0,0.022755199670791627
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,8,2,128,1,fp8,fp8,0,0.022777600586414336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,8,4,128,1,float16,fp8,0,0.024798400700092316
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,8,8,128,1,float16,float16,0,0.022694399952888487
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,8,8,128,1,float16,fp8,0,0.022836799919605254
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,8,4,128,1,fp8,fp8,0,0.02476159930229187
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,8,8,128,1,fp8,fp8,0,0.02260479927062988
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,8,1,128,1,float16,float16,0,0.022841599583625794
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,8,1,128,1,float16,fp8,0,0.02266560047864914
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,8,2,128,1,float16,float16,0,0.022598400712013245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,8,2,128,1,float16,fp8,0,0.022806400060653688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,8,1,128,1,fp8,fp8,0,0.02269600033760071
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,8,4,128,1,float16,float16,0,0.022737599909305573
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,8,2,128,1,fp8,fp8,0,0.022780799865722658
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,8,4,128,1,float16,fp8,0,0.022734400629997254
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,8,4,128,1,fp8,fp8,0,0.0226623997092247
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,8,1,128,1,float16,float16,0,0.2788448095321655
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,8,1,128,1,float16,fp8,0,0.36264801025390625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,8,1,128,1,fp8,fp8,0,0.3615407943725586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,8,2,128,1,float16,float16,0,0.2757999897003174
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,8,2,128,1,float16,fp8,0,0.361841607093811
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,8,2,128,1,fp8,fp8,0,0.3592416048049927
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,8,4,128,1,float16,float16,0,0.31383678913116453
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,8,4,128,1,float16,fp8,0,0.360647988319397
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,8,4,128,1,fp8,fp8,0,0.36091039180755613
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,8,8,128,1,float16,float16,0,0.204150390625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,8,8,128,1,float16,fp8,0,0.18786720037460328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,8,1,128,1,float16,float16,0,0.14680960178375244
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,8,8,128,1,fp8,fp8,0,0.18931519985198975
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,8,2,128,1,float16,float16,0,0.14529119729995726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,8,1,128,1,float16,fp8,0,0.19007840156555175
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,8,1,128,1,fp8,fp8,0,0.1902143955230713
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,8,2,128,1,float16,fp8,0,0.1874768018722534
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,8,4,128,1,float16,float16,0,0.16684319972991943
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,8,2,128,1,fp8,fp8,0,0.18931519985198975
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,8,8,128,1,float16,float16,0,0.11072959899902343
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,8,4,128,1,float16,fp8,0,0.18862559795379638
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,8,4,128,1,fp8,fp8,0,0.1901584029197693
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,8,8,128,1,float16,fp8,0,0.10091840028762818
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,8,8,128,1,fp8,fp8,0,0.10118720531463624
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,8,1,128,1,float16,float16,0,0.08067520260810852
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,8,1,128,1,float16,fp8,0,0.09995999932289124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,8,1,128,1,fp8,fp8,0,0.10038880109786988
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,8,2,128,1,float16,float16,0,0.08171039819717407
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,8,2,128,1,float16,fp8,0,0.10051840543746948
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,8,2,128,1,fp8,fp8,0,0.10012480020523071
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,8,4,128,1,float16,float16,0,0.09049280285835266
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,8,4,128,1,float16,fp8,0,0.1011247992515564
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,8,4,128,1,fp8,fp8,0,0.10090080499649048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,8,8,128,1,float16,float16,0,0.06239839792251587
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,8,8,128,1,float16,fp8,0,0.056888002157211306
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,8,8,128,1,fp8,fp8,0,0.05757920145988464
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,8,1,128,1,float16,float16,0,0.04518559873104096
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,8,1,128,1,float16,fp8,0,0.0555184006690979
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,8,2,128,1,float16,float16,0,0.04562560021877289
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,8,1,128,1,fp8,fp8,0,0.05474879741668701
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,8,2,128,1,float16,fp8,0,0.055486398935317996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,8,2,128,1,fp8,fp8,0,0.05549600124359131
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,8,4,128,1,float16,float16,0,0.05165119767189026
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,8,4,128,1,float16,fp8,0,0.055508798360824584
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,8,4,128,1,fp8,fp8,0,0.055550402402877806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,8,8,128,1,float16,float16,0,0.03705280125141144
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,8,8,128,1,float16,fp8,0,0.03306559920310974
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,8,8,128,1,fp8,fp8,0,0.033004799485206605
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,8,1,128,1,float16,float16,0,0.028990399837493897
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,8,1,128,1,float16,fp8,0,0.03309760093688965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,8,1,128,1,fp8,fp8,0,0.033137598633766176
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,8,2,128,1,float16,float16,0,0.028804799914360045
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,8,2,128,1,float16,fp8,0,0.033374398946762085
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,8,2,128,1,fp8,fp8,0,0.03309760093688965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,8,4,128,1,float16,fp8,0,0.03317759931087494
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,8,4,128,1,float16,float16,0,0.030902400612831116
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,8,8,128,1,float16,float16,0,0.022619199752807618
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,8,4,128,1,fp8,fp8,0,0.03397760093212128
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,8,8,128,1,float16,fp8,0,0.02072640061378479
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,8,8,128,1,fp8,fp8,0,0.020721599459648132
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,8,1,128,1,float16,float16,0,0.018713599443435668
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,8,1,128,1,float16,fp8,0,0.020665599405765532
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,8,1,128,1,fp8,fp8,0,0.02072480022907257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,8,2,128,1,float16,float16,0,0.018755200505256652
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,8,2,128,1,float16,fp8,0,0.02064639925956726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,8,2,128,1,fp8,fp8,0,0.020644800364971162
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,8,4,128,1,float16,fp8,0,0.020670400559902193
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,8,4,128,1,float16,float16,0,0.0205935999751091
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,8,4,128,1,fp8,fp8,0,0.02067680060863495
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,8,8,128,1,float16,float16,0,0.018624000251293182
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,8,8,128,1,fp8,fp8,0,0.018612800538539885
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,8,8,128,1,float16,fp8,0,0.018641600012779237
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,8,1,128,1,float16,float16,0,0.016657599806785585
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,8,1,128,1,float16,fp8,0,0.01866080015897751
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,8,1,128,1,fp8,fp8,0,0.018615999817848207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,8,2,128,1,float16,float16,0,0.01656640022993088
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,8,2,128,1,fp8,fp8,0,0.01865919977426529
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,8,2,128,1,float16,fp8,0,0.018624000251293182
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,8,4,128,1,float16,float16,0,0.018588800728321076
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,8,4,128,1,float16,fp8,0,0.01852319985628128
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,8,4,128,1,fp8,fp8,0,0.018624000251293182
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,8,8,128,1,float16,float16,0,0.016924799978733064
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,8,8,128,1,float16,fp8,0,0.01669439971446991
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,8,8,128,1,fp8,fp8,0,0.01661760061979294
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,8,1,128,1,float16,float16,0,0.01652960032224655
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,8,1,128,1,float16,fp8,0,0.01659200042486191
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,8,2,128,1,float16,float16,0,0.016582399606704712
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,8,1,128,1,fp8,fp8,0,0.016601599752902985
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,8,2,128,1,float16,fp8,0,0.0166703999042511
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,8,2,128,1,fp8,fp8,0,0.016579200327396394
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,8,4,128,1,float16,float16,0,0.016630400717258454
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,8,4,128,1,float16,fp8,0,0.016708800196647645
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,8,4,128,1,fp8,fp8,0,0.016616000235080718
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,8,8,128,1,float16,float16,0,0.016620799899101257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,8,8,128,1,float16,fp8,0,0.016705599427223206
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,8,1,128,1,float16,float16,0,0.016620799899101257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,8,8,128,1,fp8,fp8,0,0.016548800468444824
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,8,1,128,1,float16,fp8,0,0.016652800142765045
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,8,1,128,1,fp8,fp8,0,0.016680000722408293
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,8,2,128,1,float16,float16,0,0.01658560037612915
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,8,2,128,1,fp8,fp8,0,0.01656000018119812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,8,2,128,1,float16,fp8,0,0.016672000288963318
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,8,4,128,1,float16,float16,0,0.016616000235080718
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,8,4,128,1,float16,fp8,0,0.016502399742603303
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,8,4,128,1,fp8,fp8,0,0.01685439944267273
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,8,1,128,1,float16,float16,0,0.12143360376358033
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,8,1,128,1,float16,fp8,0,0.16482720375061036
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,8,2,128,1,float16,float16,0,0.12088799476623535
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,8,1,128,1,fp8,fp8,0,0.16487519741058348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,8,2,128,1,float16,fp8,0,0.16450719833374022
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,8,2,128,1,fp8,fp8,0,0.16593600511550904
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,8,4,128,1,float16,float16,0,0.14049760103225709
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,8,4,128,1,float16,fp8,0,0.16430399417877198
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,8,8,128,1,float16,float16,0,0.09633600115776061
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,8,8,128,1,float16,fp8,0,0.08625119924545288
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,8,4,128,1,fp8,fp8,0,0.1655344009399414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,8,1,128,1,float16,float16,0,0.06452800035476684
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,8,8,128,1,fp8,fp8,0,0.08668000102043152
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,8,1,128,1,float16,fp8,0,0.08624479770660401
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,8,1,128,1,fp8,fp8,0,0.08577280044555664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,8,2,128,1,float16,float16,0,0.06595519781112671
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,8,2,128,1,float16,fp8,0,0.086217600107193
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,8,2,128,1,fp8,fp8,0,0.08628640174865723
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,8,4,128,1,float16,float16,0,0.07571039795875549
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,8,4,128,1,float16,fp8,0,0.0864960014820099
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,8,4,128,1,fp8,fp8,0,0.08636959791183471
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,8,8,128,1,float16,float16,0,0.05604959726333618
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,8,8,128,1,float16,fp8,0,0.04932000041007996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,8,8,128,1,fp8,fp8,0,0.05088319778442383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,8,1,128,1,float16,float16,0,0.03728800117969513
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,8,1,128,1,float16,fp8,0,0.04842880070209503
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,8,2,128,1,float16,float16,0,0.03915199935436249
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,8,1,128,1,fp8,fp8,0,0.04934560060501099
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,8,2,128,1,float16,fp8,0,0.04893600046634674
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,8,4,128,1,float16,float16,0,0.045351999998092654
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,8,2,128,1,fp8,fp8,0,0.04928640127182007
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,8,4,128,1,float16,fp8,0,0.04935519993305206
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,8,4,128,1,fp8,fp8,0,0.04934720098972321
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,8,8,128,1,float16,float16,0,0.031071999669075014
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,8,8,128,1,float16,fp8,0,0.028835201263427736
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,8,8,128,1,fp8,fp8,0,0.028841599822044373
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,8,1,128,1,float16,float16,0,0.02288320064544678
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,8,1,128,1,float16,fp8,0,0.02887359857559204
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,8,1,128,1,fp8,fp8,0,0.02887519896030426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,8,2,128,1,float16,float16,0,0.022806400060653688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,8,2,128,1,float16,fp8,0,0.028799998760223388
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,8,2,128,1,fp8,fp8,0,0.02886880040168762
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,8,4,128,1,float16,float16,0,0.024860799312591553
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,8,4,128,1,float16,fp8,0,0.028859201073646545
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,8,4,128,1,fp8,fp8,0,0.02890399992465973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,8,8,128,1,float16,float16,0,0.018743999302387238
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,8,8,128,1,float16,fp8,0,0.01857919991016388
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,8,1,128,1,float16,float16,0,0.01615840047597885
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,8,8,128,1,fp8,fp8,0,0.018662400543689728
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,8,1,128,1,float16,fp8,0,0.018611200153827667
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,8,1,128,1,fp8,fp8,0,0.018723200261592864
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,8,2,128,1,float16,float16,0,0.014582400023937226
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,8,2,128,1,float16,fp8,0,0.01863359957933426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,8,2,128,1,fp8,fp8,0,0.018598400056362152
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,8,4,128,1,float16,float16,0,0.016638399660587312
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,8,4,128,1,float16,fp8,0,0.01857919991016388
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,8,4,128,1,fp8,fp8,0,0.01876319944858551
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,8,8,128,1,float16,float16,0,0.01656000018119812
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,8,8,128,1,float16,fp8,0,0.01645279973745346
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,8,8,128,1,fp8,fp8,0,0.015663999319076537
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,8,1,128,1,float16,float16,0,0.01470080018043518
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,8,1,128,1,float16,fp8,0,0.015223999321460725
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,8,1,128,1,fp8,fp8,0,0.016631999611854555
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,8,2,128,1,fp8,fp8,0,0.016499200463294984
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,8,2,128,1,float16,float16,0,0.014923200011253357
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,8,2,128,1,float16,fp8,0,0.01552959978580475
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,8,4,128,1,float16,float16,0,0.014688000082969666
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,8,4,128,1,fp8,fp8,0,0.016145600378513335
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,8,4,128,1,float16,fp8,0,0.016769599914550782
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,8,8,128,1,float16,float16,0,0.014604799449443817
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,8,8,128,1,float16,fp8,0,0.014547200500965118
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,8,1,128,1,float16,float16,0,0.014457599818706512
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,8,8,128,1,fp8,fp8,0,0.014776000380516052
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,8,1,128,1,float16,fp8,0,0.01446399986743927
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,8,1,128,1,fp8,fp8,0,0.014500799775123595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,8,2,128,1,float16,fp8,0,0.014567999541759491
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,8,2,128,1,float16,float16,0,0.014633600413799287
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,8,2,128,1,fp8,fp8,0,0.014632000029087067
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,8,4,128,1,float16,float16,0,0.01451839953660965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,8,4,128,1,float16,fp8,0,0.014745600521564484
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,8,4,128,1,fp8,fp8,0,0.014534400403499603
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,8,8,128,1,float16,float16,0,0.014505599439144135
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,8,8,128,1,fp8,fp8,0,0.014499199390411378
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,8,8,128,1,float16,fp8,0,0.014703999459743499
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,8,1,128,1,float16,float16,0,0.014519999921321868
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,8,1,128,1,float16,fp8,0,0.014428800344467163
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,8,2,128,1,float16,float16,0,0.014475199580192565
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,8,1,128,1,fp8,fp8,0,0.014640000462532044
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,8,2,128,1,fp8,fp8,0,0.0144896000623703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,8,2,128,1,float16,fp8,0,0.014542399346828461
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,8,4,128,1,float16,float16,0,0.014900800585746766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,8,4,128,1,float16,fp8,0,0.014508800208568573
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,8,4,128,1,fp8,fp8,0,0.014505599439144135
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,8,8,128,1,float16,fp8,0,0.012505599856376648
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,8,8,128,1,float16,float16,0,0.014708800613880158
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,8,8,128,1,fp8,fp8,0,0.014545600116252898
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,8,1,128,1,float16,float16,0,0.01255040019750595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,8,1,128,1,float16,fp8,0,0.014305600523948669
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,8,1,128,1,fp8,fp8,0,0.014399999380111694
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,8,2,128,1,float16,float16,0,0.01252799928188324
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,8,2,128,1,fp8,fp8,0,0.014494399726390838
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,8,2,128,1,float16,fp8,0,0.0146479994058609
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,8,4,128,1,float16,float16,0,0.012614400684833526
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,8,4,128,1,float16,fp8,0,0.014452800154685974
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,8,4,128,1,fp8,fp8,0,0.014579200744628906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,8,1,128,1,float16,fp8,0,0.09675679802894592
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,8,1,128,1,float16,float16,0,0.07411999702453613
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,8,1,128,1,fp8,fp8,0,0.09807999730110169
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,8,2,128,1,float16,float16,0,0.07477599978446961
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,8,2,128,1,float16,fp8,0,0.09653599858283997
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,8,4,128,1,float16,float16,0,0.08440639972686767
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,8,4,128,1,float16,fp8,0,0.09661759734153748
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,8,2,128,1,fp8,fp8,0,0.09852160215377807
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,8,4,128,1,fp8,fp8,0,0.09683359861373901
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,8,8,128,1,float16,float16,0,0.05742400288581848
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,8,8,128,1,float16,fp8,0,0.053646397590637204
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,8,1,128,1,float16,float16,0,0.04116320013999939
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,8,8,128,1,fp8,fp8,0,0.05343679785728454
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,8,1,128,1,float16,fp8,0,0.051641601324081424
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,8,1,128,1,fp8,fp8,0,0.05140159726142883
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,8,2,128,1,float16,float16,0,0.041212800145149234
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,8,2,128,1,float16,fp8,0,0.05160800218582153
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,8,2,128,1,fp8,fp8,0,0.051374399662017824
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,8,4,128,1,float16,float16,0,0.04732480049133301
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,8,4,128,1,float16,fp8,0,0.051652801036834714
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,8,8,128,1,float16,float16,0,0.03326080143451691
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,8,4,128,1,fp8,fp8,0,0.05151839852333069
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,8,8,128,1,float16,fp8,0,0.03099839985370636
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,8,8,128,1,fp8,fp8,0,0.03099679946899414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,8,1,128,1,float16,float16,0,0.02475679963827133
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,8,1,128,1,float16,fp8,0,0.030934399366378783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,8,2,128,1,float16,float16,0,0.024878400564193725
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,8,1,128,1,fp8,fp8,0,0.03102239966392517
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,8,2,128,1,float16,fp8,0,0.030969598889350893
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,8,2,128,1,fp8,fp8,0,0.030910399556159974
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,8,4,128,1,float16,float16,0,0.027692800760269164
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,8,8,128,1,float16,float16,0,0.020588800311088562
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,8,4,128,1,fp8,fp8,0,0.03097440004348755
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,8,4,128,1,float16,fp8,0,0.03121280074119568
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,8,8,128,1,float16,fp8,0,0.02051839977502823
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,8,8,128,1,fp8,fp8,0,0.018886399269104005
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,8,1,128,1,float16,float16,0,0.016739200055599212
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,8,1,128,1,float16,fp8,0,0.01876640021800995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,8,1,128,1,fp8,fp8,0,0.01873439997434616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,8,2,128,1,float16,float16,0,0.01679680049419403
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,8,2,128,1,float16,fp8,0,0.01884319931268692
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,8,2,128,1,fp8,fp8,0,0.02080000042915344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,8,4,128,1,float16,float16,0,0.01834080070257187
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,8,4,128,1,float16,fp8,0,0.020713600516319274
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,8,4,128,1,fp8,fp8,0,0.020212799310684204
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,8,8,128,1,float16,fp8,0,0.014425599575042724
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,8,8,128,1,float16,float16,0,0.014575999975204468
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,8,8,128,1,fp8,fp8,0,0.014612799882888794
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,8,1,128,1,float16,float16,0,0.012415999919176102
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,8,1,128,1,float16,fp8,0,0.01436000019311905
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,8,2,128,1,float16,float16,0,0.012556800246238708
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,8,1,128,1,fp8,fp8,0,0.012625600397586822
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,8,2,128,1,float16,fp8,0,0.01451520025730133
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,8,4,128,1,float16,float16,0,0.012542399764060973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,8,2,128,1,fp8,fp8,0,0.014627200365066529
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,8,4,128,1,float16,fp8,0,0.014441600441932679
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,8,4,128,1,fp8,fp8,0,0.013361600041389466
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,8,8,128,1,float16,float16,0,0.012641599774360657
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,8,8,128,1,float16,fp8,0,0.012427199631929398
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,8,8,128,1,fp8,fp8,0,0.012435200065374375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,8,1,128,1,float16,float16,0,0.012579199671745301
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,8,1,128,1,float16,fp8,0,0.01239359974861145
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,8,1,128,1,fp8,fp8,0,0.012521600723266602
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,8,2,128,1,float16,float16,0,0.012537600100040435
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,8,2,128,1,float16,fp8,0,0.01250240057706833
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,8,2,128,1,fp8,fp8,0,0.012425599992275238
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,8,4,128,1,float16,float16,0,0.012481600046157837
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,8,4,128,1,float16,fp8,0,0.012457600235939026
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,8,4,128,1,fp8,fp8,0,0.012459199875593185
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,8,8,128,1,float16,fp8,0,0.012328000366687774
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,8,8,128,1,fp8,fp8,0,0.010480000078678131
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,8,8,128,1,float16,float16,0,0.01178240031003952
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,8,1,128,1,float16,float16,0,0.010532800108194351
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,8,1,128,1,float16,fp8,0,0.011060799658298492
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,8,1,128,1,fp8,fp8,0,0.010865599662065507
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,8,2,128,1,float16,float16,0,0.010598400235176086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,8,2,128,1,float16,fp8,0,0.011763200163841248
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,8,2,128,1,fp8,fp8,0,0.011588799953460693
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,8,4,128,1,float16,float16,0,0.010540799796581268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,8,8,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,8,4,128,1,fp8,fp8,0,0.010982400178909302
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,8,4,128,1,float16,fp8,0,0.011766400188207626
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,8,8,128,1,float16,fp8,0,0.01050880029797554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,8,1,128,1,float16,float16,0,0.010414399951696397
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,8,8,128,1,fp8,fp8,0,0.010476800054311753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,8,1,128,1,float16,fp8,0,0.010492800176143647
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,8,1,128,1,fp8,fp8,0,0.010503999888896942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,8,2,128,1,float16,float16,0,0.01056160032749176
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,8,2,128,1,float16,fp8,0,0.010523200035095215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,8,4,128,1,float16,float16,0,0.010516799986362457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,8,2,128,1,fp8,fp8,0,0.010440000146627427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,8,4,128,1,float16,fp8,0,0.010552000254392624
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,8,8,128,1,float16,float16,0,0.010611200332641601
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,8,4,128,1,fp8,fp8,0,0.010553599894046783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,8,8,128,1,float16,fp8,0,0.010566399991512298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,8,8,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,8,1,128,1,float16,float16,0,0.010598400235176086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,8,1,128,1,fp8,fp8,0,0.010412800312042236
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,8,1,128,1,float16,fp8,0,0.010487999767065048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,8,2,128,1,float16,float16,0,0.010710400342941285
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,8,2,128,1,float16,fp8,0,0.010465600341558457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,8,2,128,1,fp8,fp8,0,0.010582400113344192
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,8,4,128,1,float16,float16,0,0.010593599826097488
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,8,4,128,1,float16,fp8,0,0.010473600029945374
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,8,4,128,1,fp8,fp8,0,0.010571199655532836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,8,1,128,1,float16,float16,0,0.0597711980342865
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,8,1,128,1,float16,fp8,0,0.06979519724845887
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,8,1,128,1,fp8,fp8,0,0.06989759802818299
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,8,2,128,1,float16,float16,0,0.06043040156364441
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,8,2,128,1,float16,fp8,0,0.06977279782295227
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,8,2,128,1,fp8,fp8,0,0.06991040110588073
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,8,4,128,1,float16,float16,0,0.06585760116577148
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,8,4,128,1,float16,fp8,0,0.06981760263442993
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,8,4,128,1,fp8,fp8,0,0.07047680020332336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,8,8,128,1,float16,float16,0,0.04139519929885864
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,8,8,128,1,float16,fp8,0,0.03910239934921265
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,8,8,128,1,fp8,fp8,0,0.03912000060081482
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,8,1,128,1,float16,float16,0,0.033134400844573975
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,8,1,128,1,float16,fp8,0,0.03904959857463837
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,8,1,128,1,fp8,fp8,0,0.039048001170158386
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,8,2,128,1,float16,float16,0,0.03326399922370911
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,8,2,128,1,fp8,fp8,0,0.039062398672103885
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,8,4,128,1,float16,float16,0,0.036641600728034976
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,8,2,128,1,float16,fp8,0,0.039083200693130496
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,8,4,128,1,float16,fp8,0,0.039139199256896975
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,8,4,128,1,fp8,fp8,0,0.03911199867725372
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,8,8,128,1,float16,float16,0,0.024831999838352204
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,8,1,128,1,float16,float16,0,0.022728000581264497
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,8,8,128,1,float16,fp8,0,0.024860799312591553
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,8,8,128,1,fp8,fp8,0,0.024857600033283234
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,8,1,128,1,float16,fp8,0,0.024710400402545928
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,8,1,128,1,fp8,fp8,0,0.02475679963827133
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,8,2,128,1,float16,float16,0,0.0227183997631073
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,8,2,128,1,float16,fp8,0,0.024716800451278685
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,8,2,128,1,fp8,fp8,0,0.024775999784469604
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,8,4,128,1,float16,float16,0,0.02271520048379898
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,8,4,128,1,float16,fp8,0,0.024774399399757386
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,8,4,128,1,fp8,fp8,0,0.02468319982290268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,8,8,128,1,float16,float16,0,0.016649599373340606
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,8,8,128,1,float16,fp8,0,0.016590400040149687
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,8,8,128,1,fp8,fp8,0,0.016705599427223206
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,8,1,128,1,float16,float16,0,0.014590400457382201
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,8,1,128,1,float16,fp8,0,0.016631999611854555
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,8,1,128,1,fp8,fp8,0,0.01659359931945801
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,8,2,128,1,float16,float16,0,0.01465120017528534
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,8,2,128,1,float16,fp8,0,0.01653759926557541
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,8,2,128,1,fp8,fp8,0,0.016633599996566772
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,8,4,128,1,float16,float16,0,0.01587360054254532
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,8,4,128,1,float16,fp8,0,0.01652960032224655
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,8,4,128,1,fp8,fp8,0,0.016628800332546233
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,8,8,128,1,float16,float16,0,0.01249760016798973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,8,8,128,1,float16,fp8,0,0.012457600235939026
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,8,8,128,1,fp8,fp8,0,0.012452799826860428
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,8,1,128,1,float16,float16,0,0.01144160032272339
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,8,1,128,1,fp8,fp8,0,0.012479999661445617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,8,1,128,1,float16,fp8,0,0.01260959953069687
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,8,2,128,1,float16,float16,0,0.011483199894428253
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,8,2,128,1,float16,fp8,0,0.012838399410247803
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,8,4,128,1,float16,float16,0,0.010918399691581726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,8,2,128,1,fp8,fp8,0,0.012625600397586822
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,8,4,128,1,float16,fp8,0,0.012401600182056428
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,8,8,128,1,float16,float16,0,0.010633599758148194
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,8,4,128,1,fp8,fp8,0,0.01247360035777092
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,8,8,128,1,float16,fp8,0,0.010502400249242783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,8,1,128,1,float16,float16,0,0.010424000024795533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,8,1,128,1,float16,fp8,0,0.010598400235176086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,8,8,128,1,fp8,fp8,0,0.010627199709415436
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,8,1,128,1,fp8,fp8,0,0.010391999781131745
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,8,2,128,1,float16,float16,0,0.010494399815797806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,8,2,128,1,float16,fp8,0,0.010651200264692306
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,8,2,128,1,fp8,fp8,0,0.010492800176143647
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,8,4,128,1,float16,float16,0,0.010523200035095215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,8,4,128,1,float16,fp8,0,0.010673599690198899
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,8,4,128,1,fp8,fp8,0,0.01043040007352829
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,8,8,128,1,float16,float16,0,0.010526400059461594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,8,8,128,1,fp8,fp8,0,0.010449600219726563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,8,1,128,1,float16,float16,0,0.010582400113344192
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,8,8,128,1,float16,fp8,0,0.010576000064611435
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,8,1,128,1,float16,fp8,0,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,8,1,128,1,fp8,fp8,0,0.010531199723482132
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,8,2,128,1,float16,float16,0,0.010532800108194351
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,8,2,128,1,float16,fp8,0,0.010451199859380722
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,8,4,128,1,float16,float16,0,0.010507199913263321
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,8,2,128,1,fp8,fp8,0,0.010475199669599533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,8,4,128,1,float16,fp8,0,0.010452800244092942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,8,8,128,1,float16,float16,0,0.010521599650382995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,8,4,128,1,fp8,fp8,0,0.010636799782514573
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,8,8,128,1,float16,fp8,0,0.010468800365924836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,8,8,128,1,fp8,fp8,0,0.010523200035095215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,8,1,128,1,float16,fp8,0,0.01040479987859726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,8,1,128,1,float16,float16,0,0.010480000078678131
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,8,1,128,1,fp8,fp8,0,0.010503999888896942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,8,2,128,1,float16,float16,0,0.010416000336408614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,8,2,128,1,fp8,fp8,0,0.010452800244092942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,8,2,128,1,float16,fp8,0,0.010452800244092942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,8,4,128,1,float16,float16,0,0.010449600219726563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,8,4,128,1,float16,fp8,0,0.010494399815797806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,8,4,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,8,8,128,1,float16,float16,0,0.01045759990811348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,8,8,128,1,float16,fp8,0,0.010452800244092942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,8,8,128,1,fp8,fp8,0,0.010451199859380722
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,8,1,128,1,float16,float16,0,0.010452800244092942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,8,1,128,1,float16,fp8,0,0.010480000078678131
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,8,1,128,1,fp8,fp8,0,0.01040479987859726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,8,2,128,1,float16,float16,0,0.010372799634933472
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,8,2,128,1,float16,fp8,0,0.010452800244092942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,8,2,128,1,fp8,fp8,0,0.010360000282526016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,8,4,128,1,float16,float16,0,0.010393600165843963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,8,4,128,1,float16,fp8,0,0.010427200049161912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,8,4,128,1,fp8,fp8,0,0.010382399708032609
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,8,1,128,1,float16,float16,0,0.05172799825668335
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,8,1,128,1,float16,fp8,0,0.05576800107955933
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,8,1,128,1,fp8,fp8,0,0.055593597888946536
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,8,2,128,1,float16,float16,0,0.05167679786682129
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,8,2,128,1,fp8,fp8,0,0.05567359924316406
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,8,2,128,1,float16,fp8,0,0.05743039846420288
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,8,4,128,1,float16,float16,0,0.05508319735527038
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,8,4,128,1,float16,fp8,0,0.05559039711952209
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,8,4,128,1,fp8,fp8,0,0.05738080143928528
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,8,8,128,1,float16,float16,0,0.03431519865989685
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,8,8,128,1,float16,fp8,0,0.032913601398468016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,8,1,128,1,float16,float16,0,0.03089759945869446
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,8,8,128,1,fp8,fp8,0,0.03289439976215362
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,8,1,128,1,float16,fp8,0,0.03297280073165894
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,8,2,128,1,float16,float16,0,0.031020799279212953
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,8,1,128,1,fp8,fp8,0,0.03297280073165894
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,8,2,128,1,float16,fp8,0,0.03291679918766022
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,8,2,128,1,fp8,fp8,0,0.03291679918766022
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,8,4,128,1,float16,float16,0,0.03112640082836151
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,8,4,128,1,float16,fp8,0,0.03174239993095398
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,8,8,128,1,float16,float16,0,0.02109919935464859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,8,4,128,1,fp8,fp8,0,0.03287360072135925
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,8,8,128,1,float16,fp8,0,0.02066880017518997
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,8,8,128,1,fp8,fp8,0,0.02066880017518997
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,8,1,128,1,float16,float16,0,0.02059040069580078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,8,1,128,1,float16,fp8,0,0.02064319998025894
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,8,1,128,1,fp8,fp8,0,0.02077440023422241
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,8,2,128,1,float16,float16,0,0.020630399882793426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,8,2,128,1,float16,fp8,0,0.020803199708461763
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,8,2,128,1,fp8,fp8,0,0.020656000077724456
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,8,4,128,1,float16,float16,0,0.02072480022907257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,8,4,128,1,float16,fp8,0,0.020737600326538087
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,8,4,128,1,fp8,fp8,0,0.020849600434303284
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,8,8,128,1,float16,float16,0,0.014569599926471711
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,8,8,128,1,float16,fp8,0,0.014555199444293976
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,8,8,128,1,fp8,fp8,0,0.01451520025730133
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,8,1,128,1,float16,float16,0,0.014510400593280792
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,8,1,128,1,float16,fp8,0,0.01451680064201355
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,8,1,128,1,fp8,fp8,0,0.014577600359916686
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,8,2,128,1,float16,float16,0,0.014502400159835815
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,8,2,128,1,float16,fp8,0,0.014636799693107605
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,8,4,128,1,float16,float16,0,0.015027199685573579
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,8,2,128,1,fp8,fp8,0,0.014519999921321868
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,8,4,128,1,float16,fp8,0,0.014662399888038635
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,8,4,128,1,fp8,fp8,0,0.014569599926471711
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,8,8,128,1,float16,fp8,0,0.010523200035095215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,8,8,128,1,float16,float16,0,0.010715200006961823
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,8,1,128,1,float16,float16,0,0.01045600026845932
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,8,1,128,1,float16,fp8,0,0.01053600013256073
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,8,8,128,1,fp8,fp8,0,0.012628799676895142
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,8,1,128,1,fp8,fp8,0,0.010443200170993806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,8,2,128,1,float16,float16,0,0.010486400127410889
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,8,2,128,1,fp8,fp8,0,0.010507199913263321
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,8,2,128,1,float16,fp8,0,0.012654399871826172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,8,4,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,8,4,128,1,float16,fp8,0,0.010454399883747101
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,8,8,128,1,float16,float16,0,0.010523200035095215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,8,4,128,1,fp8,fp8,0,0.010766399651765823
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,8,8,128,1,float16,fp8,0,0.01045600026845932
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,8,8,128,1,fp8,fp8,0,0.01061279997229576
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,8,1,128,1,float16,float16,0,0.010686399787664414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,8,1,128,1,float16,fp8,0,0.010467199981212616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,8,1,128,1,fp8,fp8,0,0.01048159971833229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,8,2,128,1,float16,float16,0,0.01077599972486496
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,8,2,128,1,float16,fp8,0,0.010454399883747101
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,8,2,128,1,fp8,fp8,0,0.010532800108194351
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,8,4,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,8,4,128,1,fp8,fp8,0,0.010603199899196624
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,8,4,128,1,float16,float16,0,0.010780800133943558
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,8,8,128,1,float16,float16,0,0.01043360009789467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,8,8,128,1,fp8,fp8,0,0.010507199913263321
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,8,8,128,1,float16,fp8,0,0.010463999956846238
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,8,1,128,1,float16,float16,0,0.010473600029945374
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,8,1,128,1,float16,fp8,0,0.010471999645233154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,8,1,128,1,fp8,fp8,0,0.010463999956846238
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,8,2,128,1,float16,float16,0,0.010465600341558457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,8,2,128,1,float16,fp8,0,0.010476800054311753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,8,2,128,1,fp8,fp8,0,0.010492800176143647
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,8,4,128,1,float16,float16,0,0.010380800068378448
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,8,4,128,1,float16,fp8,0,0.01040479987859726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,8,4,128,1,fp8,fp8,0,0.010440000146627427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,8,8,128,1,float16,float16,0,0.01040479987859726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,8,8,128,1,float16,fp8,0,0.01056160032749176
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,8,8,128,1,fp8,fp8,0,0.010487999767065048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,8,1,128,1,float16,float16,0,0.01040479987859726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,8,1,128,1,fp8,fp8,0,0.010318399965763092
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,8,1,128,1,float16,fp8,0,0.010497599840164185
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,8,2,128,1,float16,fp8,0,0.010496000200510025
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,8,2,128,1,float16,float16,0,0.010462400317192078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,8,2,128,1,fp8,fp8,0,0.010443200170993806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,8,4,128,1,float16,fp8,0,0.010444799810647965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,8,4,128,1,fp8,fp8,0,0.010516799986362457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,8,4,128,1,float16,float16,0,0.010384000092744827
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,8,8,128,1,float16,float16,0,0.010353600233793258
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,8,8,128,1,float16,fp8,0,0.010311999917030334
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,8,8,128,1,fp8,fp8,0,0.010441599786281586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,8,1,128,1,float16,float16,0,0.010396800190210342
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,8,1,128,1,float16,fp8,0,0.010463999956846238
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,8,1,128,1,fp8,fp8,0,0.010454399883747101
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,8,2,128,1,float16,float16,0,0.010398399829864503
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,8,2,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,8,2,128,1,fp8,fp8,0,0.010487999767065048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,8,4,128,1,float16,float16,0,0.010412800312042236
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,8,4,128,1,float16,fp8,0,0.010467199981212616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,8,4,128,1,fp8,fp8,0,0.01037919968366623
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,8,1,128,1,float16,float16,0,0.051292800903320314
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,8,1,128,1,float16,fp8,0,0.05137760043144226
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,8,1,128,1,fp8,fp8,0,0.051337599754333496
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,8,2,128,1,float16,float16,0,0.05060960054397583
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,8,2,128,1,float16,fp8,0,0.050886398553848265
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,8,2,128,1,fp8,fp8,0,0.051367998123168945
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,8,4,128,1,float16,float16,0,0.05162400007247925
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,8,4,128,1,float16,fp8,0,0.050939202308654785
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,8,4,128,1,fp8,fp8,0,0.05135840177536011
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,8,8,128,1,float16,float16,0,0.031016001105308534
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,8,8,128,1,float16,fp8,0,0.029003199934959412
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,8,8,128,1,fp8,fp8,0,0.0289792001247406
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,8,1,128,1,float16,float16,0,0.030313599109649658
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,8,1,128,1,float16,fp8,0,0.028993600606918336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,8,1,128,1,fp8,fp8,0,0.02901279926300049
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,8,2,128,1,float16,float16,0,0.030212798714637758
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,8,2,128,1,float16,fp8,0,0.029084798693656922
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,8,2,128,1,fp8,fp8,0,0.028942400217056276
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,8,4,128,1,float16,float16,0,0.03087199926376343
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,8,4,128,1,float16,fp8,0,0.028964799642562867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,8,8,128,1,float16,float16,0,0.020729599893093108
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,8,4,128,1,fp8,fp8,0,0.02889760136604309
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,8,8,128,1,float16,fp8,0,0.019724799692630766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,8,8,128,1,fp8,fp8,0,0.018639999628067016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,8,1,128,1,float16,fp8,0,0.018622399866580965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,8,1,128,1,float16,float16,0,0.018875199556350707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,8,2,128,1,float16,float16,0,0.01937279999256134
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,8,1,128,1,fp8,fp8,0,0.02014559954404831
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,8,2,128,1,float16,fp8,0,0.02028000056743622
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,8,2,128,1,fp8,fp8,0,0.020681600272655486
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,8,4,128,1,float16,float16,0,0.02054399996995926
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,8,4,128,1,float16,fp8,0,0.020718400180339814
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,8,4,128,1,fp8,fp8,0,0.02072640061378479
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,8,8,128,1,float16,float16,0,0.014591999351978302
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,8,8,128,1,float16,fp8,0,0.014476799964904785
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,8,8,128,1,fp8,fp8,0,0.014483200013637542
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,8,1,128,1,float16,float16,0,0.014419199526309967
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,8,1,128,1,float16,fp8,0,0.014528000354766845
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,8,1,128,1,fp8,fp8,0,0.014374400675296783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,8,2,128,1,float16,float16,0,0.014233599603176116
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,8,2,128,1,float16,fp8,0,0.014481599628925323
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,8,2,128,1,fp8,fp8,0,0.01451520025730133
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,8,4,128,1,float16,float16,0,0.014550399780273438
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,8,4,128,1,float16,fp8,0,0.014542399346828461
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,8,4,128,1,fp8,fp8,0,0.014499199390411378
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,8,8,128,1,float16,fp8,0,0.010395199805498124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,8,8,128,1,float16,float16,0,0.010449600219726563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,8,8,128,1,fp8,fp8,0,0.010596799850463866
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,8,1,128,1,float16,float16,0,0.010545600205659866
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,8,1,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,8,1,128,1,fp8,fp8,0,0.010462400317192078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,8,2,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,8,2,128,1,fp8,fp8,0,0.010521599650382995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,8,2,128,1,float16,fp8,0,0.010758399963378906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,8,4,128,1,float16,float16,0,0.010502400249242783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,8,4,128,1,float16,fp8,0,0.010520000010728836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,8,8,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,8,4,128,1,fp8,fp8,0,0.01072160005569458
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,8,8,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,8,8,128,1,fp8,fp8,0,0.01043199971318245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,8,1,128,1,float16,fp8,0,0.010532800108194351
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,8,1,128,1,float16,float16,0,0.010702399909496308
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,8,1,128,1,fp8,fp8,0,0.010555200278759003
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,8,2,128,1,float16,fp8,0,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,8,2,128,1,float16,float16,0,0.0105103999376297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,8,2,128,1,fp8,fp8,0,0.010416000336408614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,8,4,128,1,float16,float16,0,0.010436800122261048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,8,4,128,1,float16,fp8,0,0.010505600273609162
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,8,8,128,1,float16,float16,0,0.010398399829864503
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,8,4,128,1,fp8,fp8,0,0.010396800190210342
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,8,8,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,8,8,128,1,fp8,fp8,0,0.010380800068378448
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,8,1,128,1,float16,float16,0,0.010425599664449692
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,8,1,128,1,float16,fp8,0,0.010375999659299851
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,8,1,128,1,fp8,fp8,0,0.010351999849081039
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,8,2,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,8,2,128,1,float16,fp8,0,0.010499200224876404
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,8,2,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,8,4,128,1,float16,float16,0,0.01040000021457672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,8,4,128,1,float16,fp8,0,0.010361599922180175
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,8,8,128,1,float16,float16,0,0.010496000200510025
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,8,4,128,1,fp8,fp8,0,0.010371199995279311
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,8,8,128,1,float16,fp8,0,0.010328000038862228
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,8,8,128,1,fp8,fp8,0,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,8,1,128,1,float16,float16,0,0.010454399883747101
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,8,1,128,1,float16,fp8,0,0.010443200170993806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,8,1,128,1,fp8,fp8,0,0.010353600233793258
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,8,2,128,1,float16,float16,0,0.010419200360774993
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,8,2,128,1,float16,fp8,0,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,8,2,128,1,fp8,fp8,0,0.010358399897813796
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,8,4,128,1,float16,fp8,0,0.010375999659299851
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,8,4,128,1,float16,float16,0,0.010644800215959548
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,8,4,128,1,fp8,fp8,0,0.010428799688816071
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,8,8,128,1,float16,float16,0,0.010494399815797806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,8,8,128,1,float16,fp8,0,0.010969600081443787
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,8,8,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,8,1,128,1,float16,float16,0,0.0103472001850605
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,8,1,128,1,float16,fp8,0,0.01037440001964569
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,8,1,128,1,fp8,fp8,0,0.0104032002389431
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,8,2,128,1,float16,float16,0,0.010372799634933472
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,8,2,128,1,float16,fp8,0,0.010375999659299851
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,8,2,128,1,fp8,fp8,0,0.010441599786281586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,8,4,128,1,float16,float16,0,0.010412800312042236
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,8,4,128,1,float16,fp8,0,0.010345599800348281
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,8,4,128,1,fp8,fp8,0,0.010388799756765366
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,8,1,128,1,float16,fp8,0,0.04535360038280487
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,8,1,128,1,float16,float16,0,0.049502399563789365
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,8,1,128,1,fp8,fp8,0,0.045259198546409606
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,8,2,128,1,float16,float16,0,0.0494623988866806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,8,2,128,1,float16,fp8,0,0.04542720019817352
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,8,2,128,1,fp8,fp8,0,0.04526079893112182
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,8,4,128,1,float16,float16,0,0.04932479858398438
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,8,4,128,1,float16,fp8,0,0.04552479982376099
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,8,4,128,1,fp8,fp8,0,0.045263999700546266
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,8,8,128,1,float16,float16,0,0.028887999057769776
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,8,8,128,1,float16,fp8,0,0.026956799626350402
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,8,8,128,1,fp8,fp8,0,0.02685759961605072
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,8,1,128,1,float16,fp8,0,0.026966398954391478
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,8,1,128,1,float16,float16,0,0.02905920147895813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,8,1,128,1,fp8,fp8,0,0.026924800872802735
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,8,2,128,1,float16,float16,0,0.028932800889015196
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,8,2,128,1,float16,fp8,0,0.0268640011548996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,8,2,128,1,fp8,fp8,0,0.026833599805831908
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,8,4,128,1,float16,float16,0,0.028908801078796387
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,8,4,128,1,float16,fp8,0,0.02683199942111969
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,8,4,128,1,fp8,fp8,0,0.026931199431419372
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,8,8,128,1,float16,float16,0,0.01871519982814789
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,8,8,128,1,float16,fp8,0,0.018624000251293182
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,8,8,128,1,fp8,fp8,0,0.018598400056362152
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,8,1,128,1,float16,float16,0,0.01886080056428909
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,8,1,128,1,float16,fp8,0,0.01863359957933426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,8,1,128,1,fp8,fp8,0,0.01858399957418442
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,8,2,128,1,float16,float16,0,0.018648000061511995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,8,2,128,1,float16,fp8,0,0.0186256006360054
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,8,2,128,1,fp8,fp8,0,0.01863359957933426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,8,4,128,1,float16,float16,0,0.018636800348758698
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,8,4,128,1,float16,fp8,0,0.018636800348758698
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,8,4,128,1,fp8,fp8,0,0.018572799861431122
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,8,8,128,1,float16,float16,0,0.014510400593280792
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,8,8,128,1,float16,fp8,0,0.012470400333404541
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,8,8,128,1,fp8,fp8,0,0.013896000385284425
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,8,1,128,1,float16,float16,0,0.014510400593280792
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,8,1,128,1,float16,fp8,0,0.013212800025939941
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,8,1,128,1,fp8,fp8,0,0.01255040019750595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,8,2,128,1,float16,float16,0,0.014521600306034088
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,8,2,128,1,float16,fp8,0,0.013684800267219544
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,8,2,128,1,fp8,fp8,0,0.012580800056457519
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,8,4,128,1,float16,float16,0,0.014558400213718414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,8,8,128,1,float16,float16,0,0.010599999874830245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,8,4,128,1,fp8,fp8,0,0.014127999544143677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,8,4,128,1,float16,fp8,0,0.013729600608348847
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,8,8,128,1,float16,fp8,0,0.010449600219726563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,8,8,128,1,fp8,fp8,0,0.010460799932479859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,8,1,128,1,float16,float16,0,0.010662399977445603
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,8,1,128,1,float16,fp8,0,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,8,1,128,1,fp8,fp8,0,0.010411199927330018
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,8,2,128,1,float16,fp8,0,0.010468800365924836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,8,2,128,1,float16,float16,0,0.010764800012111664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,8,2,128,1,fp8,fp8,0,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,8,4,128,1,float16,float16,0,0.010521599650382995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,8,4,128,1,fp8,fp8,0,0.010555200278759003
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,8,4,128,1,float16,fp8,0,0.010716799646615982
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,8,8,128,1,float16,fp8,0,0.01043360009789467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,8,8,128,1,float16,float16,0,0.010449600219726563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,8,8,128,1,fp8,fp8,0,0.010744000226259232
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,8,1,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,8,1,128,1,float16,fp8,0,0.010473600029945374
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,8,2,128,1,float16,float16,0,0.010416000336408614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,8,1,128,1,fp8,fp8,0,0.010804799944162368
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,8,2,128,1,float16,fp8,0,0.010441599786281586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,8,2,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,8,4,128,1,float16,fp8,0,0.010451199859380722
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,8,4,128,1,float16,float16,0,0.01067200005054474
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,8,4,128,1,fp8,fp8,0,0.010435199737548828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,8,8,128,1,float16,float16,0,0.010435199737548828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,8,8,128,1,float16,fp8,0,0.01058880016207695
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,8,8,128,1,fp8,fp8,0,0.010454399883747101
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,8,1,128,1,float16,float16,0,0.010416000336408614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,8,1,128,1,float16,fp8,0,0.01072319969534874
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,8,1,128,1,fp8,fp8,0,0.010427200049161912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,8,2,128,1,float16,float16,0,0.010398399829864503
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,8,2,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,8,4,128,1,float16,float16,0,0.01048479974269867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,8,2,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,8,4,128,1,float16,fp8,0,0.010395199805498124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,8,4,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,8,8,128,1,float16,float16,0,0.010542400181293488
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,8,8,128,1,fp8,fp8,0,0.01045759990811348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,8,8,128,1,float16,fp8,0,0.010443200170993806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,8,1,128,1,float16,fp8,0,0.010443200170993806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,8,1,128,1,float16,float16,0,0.010548800230026245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,8,1,128,1,fp8,fp8,0,0.010393600165843963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,8,2,128,1,float16,float16,0,0.01040000021457672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,8,2,128,1,float16,fp8,0,0.01043360009789467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,8,2,128,1,fp8,fp8,0,0.010547199845314026
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,8,4,128,1,float16,float16,0,0.010372799634933472
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,8,4,128,1,float16,fp8,0,0.010608000308275222
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,8,4,128,1,fp8,fp8,0,0.01035040020942688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,8,8,128,1,float16,float16,0,0.010366400331258773
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,8,8,128,1,float16,fp8,0,0.010543999820947647
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,8,1,128,1,float16,float16,0,0.01035040020942688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,8,8,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,8,1,128,1,float16,fp8,0,0.010286399722099304
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,8,1,128,1,fp8,fp8,0,0.010409600287675857
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,8,2,128,1,float16,float16,0,0.01043360009789467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,8,2,128,1,float16,fp8,0,0.010363200306892395
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,8,4,128,1,float16,float16,0,0.010380800068378448
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,8,2,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,8,4,128,1,float16,fp8,0,0.010307200253009796
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,8,4,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,4,1,128,1,float16,float16,0,1.5670432090759276
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,4,1,128,1,float16,fp8,0,1.5122048377990722
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,4,1,128,1,fp8,fp8,0,1.5131759643554688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,4,4,128,1,float16,float16,0,0.9012639999389649
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,4,2,128,1,float16,fp8,0,1.5121775627136231
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,4,2,128,1,fp8,fp8,0,1.5069536209106444
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,4,2,128,1,float16,float16,0,1.9994592666625977
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,4,4,128,1,float16,fp8,0,0.8182623863220215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,4,4,128,1,fp8,fp8,0,0.821833610534668
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,4,1,128,1,float16,float16,0,0.8784591674804687
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,4,1,128,1,float16,fp8,0,1.03712158203125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,4,1,128,1,fp8,fp8,0,0.8244336128234864
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,4,2,128,1,float16,float16,0,0.8866576194763184
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,4,2,128,1,float16,fp8,0,0.8178671836853028
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,4,4,128,1,float16,float16,0,0.5276815891265869
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,4,2,128,1,fp8,fp8,0,0.8237824440002441
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,4,4,128,1,float16,fp8,0,0.4750815868377686
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,4,4,128,1,fp8,fp8,0,0.48449277877807617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,4,1,128,1,float16,float16,0,0.5127520084381103
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,4,1,128,1,float16,fp8,0,0.47280001640319824
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,4,1,128,1,fp8,fp8,0,0.47492318153381347
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,4,2,128,1,float16,float16,0,0.5165552139282227
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,4,2,128,1,float16,fp8,0,0.47440319061279296
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,4,2,128,1,fp8,fp8,0,0.4764431953430176
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,4,4,128,1,float16,float16,0,0.3272655963897705
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,4,4,128,1,float16,fp8,0,0.3035696029663086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,4,4,128,1,fp8,fp8,0,0.30183360576629636
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,4,1,128,1,float16,float16,0,0.32195520401000977
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,4,1,128,1,float16,fp8,0,0.30053761005401614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,4,1,128,1,fp8,fp8,0,0.3017983913421631
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,4,2,128,1,float16,float16,0,0.3217328071594238
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,4,2,128,1,float16,fp8,0,0.3020319938659668
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,4,2,128,1,fp8,fp8,0,0.30191679000854493
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,4,1,128,1,float16,float16,0,0.9747695922851562
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,4,1,128,1,float16,fp8,0,0.9196463584899902
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,4,1,128,1,fp8,fp8,0,0.9210479736328125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,4,4,128,1,float16,float16,0,0.558793592453003
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,4,2,128,1,float16,float16,0,1.0125136375427246
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,4,2,128,1,fp8,fp8,0,0.9202400207519531
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,4,2,128,1,float16,fp8,0,0.9207887649536133
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,4,4,128,1,float16,fp8,0,0.5145023822784424
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,4,1,128,1,float16,float16,0,0.5385280132293702
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,4,4,128,1,fp8,fp8,0,0.6882847785949707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,4,1,128,1,float16,fp8,0,0.5149807929992676
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,4,1,128,1,fp8,fp8,0,0.5160096168518067
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,4,2,128,1,float16,float16,0,0.5400496006011963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,4,2,128,1,fp8,fp8,0,0.5145999908447265
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,4,4,128,1,float16,float16,0,0.33081281185150146
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,4,2,128,1,float16,fp8,0,0.6565919876098633
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,4,4,128,1,float16,fp8,0,0.30707359313964844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,4,1,128,1,float16,float16,0,0.319484806060791
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,4,4,128,1,fp8,fp8,0,0.3045504093170166
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,4,1,128,1,float16,fp8,0,0.3067823886871338
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,4,1,128,1,fp8,fp8,0,0.3074192047119141
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,4,2,128,1,float16,float16,0,0.32095839977264407
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,4,2,128,1,fp8,fp8,0,0.3057503938674927
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,4,2,128,1,float16,fp8,0,0.3055263996124268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,4,4,128,1,float16,float16,0,0.20653760433197021
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,4,4,128,1,float16,fp8,0,0.19463679790496827
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,4,4,128,1,fp8,fp8,0,0.1947376012802124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,4,1,128,1,float16,float16,0,0.2034656047821045
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,4,1,128,1,float16,fp8,0,0.19440000057220458
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,4,1,128,1,fp8,fp8,0,0.1936303973197937
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,4,2,128,1,float16,float16,0,0.20617599487304689
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,4,2,128,1,float16,fp8,0,0.19340319633483888
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,4,2,128,1,fp8,fp8,0,0.19347679615020752
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,4,1,128,1,float16,float16,0,0.7057744026184082
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,4,1,128,1,float16,fp8,0,0.6874415874481201
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,4,2,128,1,float16,float16,0,0.699455976486206
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,4,1,128,1,fp8,fp8,0,0.6829103946685791
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,4,4,128,1,float16,float16,0,0.41216158866882324
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,4,4,128,1,float16,fp8,0,0.3868864059448242
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,4,2,128,1,float16,fp8,0,0.683735990524292
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,4,2,128,1,fp8,fp8,0,0.6861023902893066
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,4,4,128,1,fp8,fp8,0,0.38655838966369627
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,4,1,128,1,float16,float16,0,0.4011712074279785
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,4,1,128,1,float16,fp8,0,0.38643999099731446
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,4,1,128,1,fp8,fp8,0,0.3886096000671387
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,4,2,128,1,float16,float16,0,0.4028304100036621
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,4,4,128,1,float16,float16,0,0.25465919971466067
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,4,2,128,1,float16,fp8,0,0.3856271982192993
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,4,2,128,1,fp8,fp8,0,0.38632800579071047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,4,4,128,1,float16,fp8,0,0.23768160343170167
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,4,4,128,1,fp8,fp8,0,0.23737919330596924
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,4,1,128,1,float16,float16,0,0.24587199687957764
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,4,1,128,1,float16,fp8,0,0.23684320449829102
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,4,1,128,1,fp8,fp8,0,0.23440799713134766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,4,2,128,1,float16,float16,0,0.24494879245758056
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,4,2,128,1,float16,fp8,0,0.23565919399261476
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,4,4,128,1,float16,float16,0,0.1487504005432129
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,4,2,128,1,fp8,fp8,0,0.23695199489593505
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,4,4,128,1,float16,fp8,0,0.1441696047782898
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,4,4,128,1,fp8,fp8,0,0.1417296051979065
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,4,1,128,1,float16,float16,0,0.14456959962844848
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,4,1,128,1,float16,fp8,0,0.14301120042800902
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,4,2,128,1,float16,float16,0,0.14643360376358033
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,4,1,128,1,fp8,fp8,0,0.14623839855194093
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,4,2,128,1,fp8,fp8,0,0.14298880100250244
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,4,2,128,1,float16,fp8,0,0.14079359769821168
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,4,1,128,1,float16,float16,0,0.8787504196166992
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,4,1,128,1,float16,fp8,0,0.8838512420654296
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,4,1,128,1,fp8,fp8,0,0.8775487899780273
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,4,2,128,1,float16,float16,0,0.867255973815918
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,4,4,128,1,float16,float16,0,0.5066207885742188
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,4,4,128,1,float16,fp8,0,0.4795487880706787
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,4,2,128,1,fp8,fp8,0,0.8812560081481934
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,4,2,128,1,float16,fp8,0,1.0719663619995117
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,4,4,128,1,fp8,fp8,0,0.47809600830078125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,4,1,128,1,float16,float16,0,0.4820576190948486
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,4,1,128,1,float16,fp8,0,0.48221759796142577
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,4,1,128,1,fp8,fp8,0,0.5628575801849365
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,4,2,128,1,float16,float16,0,0.48129119873046877
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,4,2,128,1,float16,fp8,0,0.4805744171142578
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,4,4,128,1,float16,float16,0,0.290884804725647
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,4,2,128,1,fp8,fp8,0,0.477780818939209
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,4,4,128,1,float16,fp8,0,0.2778480052947998
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,4,4,128,1,fp8,fp8,0,0.2757872104644775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,4,1,128,1,float16,float16,0,0.2788624048233032
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,4,1,128,1,float16,fp8,0,0.2768496036529541
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,4,1,128,1,fp8,fp8,0,0.27405760288238523
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,4,2,128,1,float16,float16,0,0.2808671951293945
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,4,2,128,1,float16,fp8,0,0.27714879512786866
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,4,4,128,1,float16,float16,0,0.18225280046463013
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,4,2,128,1,fp8,fp8,0,0.274399995803833
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,4,4,128,1,float16,fp8,0,0.17259680032730101
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,4,4,128,1,fp8,fp8,0,0.17267520427703859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,4,1,128,1,float16,float16,0,0.1747920036315918
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,4,1,128,1,float16,fp8,0,0.17244960069656373
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,4,1,128,1,fp8,fp8,0,0.17310719490051268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,4,2,128,1,float16,float16,0,0.1750223994255066
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,4,2,128,1,float16,fp8,0,0.17246079444885254
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,4,2,128,1,fp8,fp8,0,0.17229599952697755
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,4,4,128,1,float16,float16,0,0.11225279569625854
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,4,4,128,1,float16,fp8,0,0.10900959968566895
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,4,4,128,1,fp8,fp8,0,0.10919359922409058
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,4,1,128,1,float16,float16,0,0.11002399921417236
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,4,1,128,1,float16,fp8,0,0.10891200304031372
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,4,2,128,1,float16,float16,0,0.11003680229187011
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,4,1,128,1,fp8,fp8,0,0.10868320465087891
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,4,2,128,1,float16,fp8,0,0.10892800092697144
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,4,2,128,1,fp8,fp8,0,0.10875200033187866
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,4,1,128,1,float16,float16,0,0.5394432067871093
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,4,1,128,1,float16,fp8,0,0.5598832130432129
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,4,1,128,1,fp8,fp8,0,0.5521279811859131
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,4,2,128,1,float16,fp8,0,0.5580624103546142
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,4,2,128,1,float16,float16,0,0.541590404510498
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,4,2,128,1,fp8,fp8,0,0.5515920162200928
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,4,4,128,1,float16,fp8,0,0.3083391904830933
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,4,4,128,1,float16,float16,0,0.32072160243988035
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,4,4,128,1,fp8,fp8,0,0.30543520450592043
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,4,1,128,1,float16,float16,0,0.3021791934967041
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,4,1,128,1,float16,fp8,0,0.3096656084060669
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,4,1,128,1,fp8,fp8,0,0.30600481033325194
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,4,2,128,1,float16,float16,0,0.30377280712127686
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,4,2,128,1,float16,fp8,0,0.3098031997680664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,4,2,128,1,fp8,fp8,0,0.3059151887893677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,4,4,128,1,float16,float16,0,0.18622879981994628
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,4,4,128,1,float16,fp8,0,0.18084479570388795
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,4,4,128,1,fp8,fp8,0,0.17855360507965087
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,4,1,128,1,float16,float16,0,0.17696319818496703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,4,1,128,1,float16,fp8,0,0.18121119737625122
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,4,1,128,1,fp8,fp8,0,0.178928005695343
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,4,2,128,1,float16,float16,0,0.17921600341796876
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,4,2,128,1,float16,fp8,0,0.1784432053565979
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,4,2,128,1,fp8,fp8,0,0.17950400114059448
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,4,4,128,1,float16,float16,0,0.11486079692840576
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,4,4,128,1,fp8,fp8,0,0.11165759563446045
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,4,4,128,1,float16,fp8,0,0.11363519430160522
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,4,1,128,1,float16,float16,0,0.11296319961547852
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,4,1,128,1,float16,fp8,0,0.11183520555496215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,4,1,128,1,fp8,fp8,0,0.11167839765548707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,4,2,128,1,float16,fp8,0,0.11334559917449952
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,4,2,128,1,fp8,fp8,0,0.11423519849777222
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,4,2,128,1,float16,float16,0,0.1126255989074707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,4,4,128,1,float16,float16,0,0.08436639904975891
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,4,4,128,1,float16,fp8,0,0.08422399759292602
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,4,4,128,1,fp8,fp8,0,0.0842095971107483
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,4,1,128,1,float16,float16,0,0.08443359732627868
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,4,1,128,1,float16,fp8,0,0.08261600136756897
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,4,1,128,1,fp8,fp8,0,0.08419520258903504
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,4,2,128,1,float16,float16,0,0.08415200114250183
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,4,2,128,1,fp8,fp8,0,0.08228639960289001
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,4,2,128,1,float16,fp8,0,0.08422880172729492
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,4,1,128,1,float16,float16,0,0.5091472148895264
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,4,1,128,1,float16,fp8,0,0.553000020980835
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,4,2,128,1,float16,float16,0,0.5042191982269287
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,4,1,128,1,fp8,fp8,0,0.5562384128570557
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,4,4,128,1,float16,float16,0,0.2975856065750122
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,4,4,128,1,float16,fp8,0,0.30004799365997314
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,4,2,128,1,float16,fp8,0,0.5588768005371094
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,4,2,128,1,fp8,fp8,0,0.5528416156768798
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,4,4,128,1,fp8,fp8,0,0.2996864080429077
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,4,1,128,1,float16,float16,0,0.27853760719299314
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,4,1,128,1,float16,fp8,0,0.30393760204315184
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,4,1,128,1,fp8,fp8,0,0.2998255968093872
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,4,2,128,1,float16,float16,0,0.27765119075775146
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,4,2,128,1,float16,fp8,0,0.3018768072128296
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,4,4,128,1,float16,float16,0,0.17100800275802613
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,4,2,128,1,fp8,fp8,0,0.3044687986373901
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,4,4,128,1,float16,fp8,0,0.1696992039680481
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,4,4,128,1,fp8,fp8,0,0.16966880559921266
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,4,1,128,1,float16,float16,0,0.15877280235290528
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,4,1,128,1,float16,fp8,0,0.1708624005317688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,4,1,128,1,fp8,fp8,0,0.16932320594787598
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,4,2,128,1,float16,float16,0,0.16111680269241332
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,4,2,128,1,float16,fp8,0,0.17004640102386476
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,4,2,128,1,fp8,fp8,0,0.17114399671554564
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,4,4,128,1,float16,float16,0,0.1049232006072998
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,4,4,128,1,float16,fp8,0,0.10496000051498414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,4,4,128,1,fp8,fp8,0,0.10265920162200928
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,4,1,128,1,float16,float16,0,0.09989280104637147
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,4,1,128,1,float16,fp8,0,0.10285600423812866
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,4,1,128,1,fp8,fp8,0,0.10277600288391113
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,4,2,128,1,float16,float16,0,0.09971519708633422
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,4,2,128,1,float16,fp8,0,0.10362720489501953
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,4,2,128,1,fp8,fp8,0,0.1028704047203064
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,4,4,128,1,float16,float16,0,0.064028799533844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,4,4,128,1,float16,fp8,0,0.06409760117530823
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,4,1,128,1,float16,float16,0,0.061908799409866336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,4,4,128,1,fp8,fp8,0,0.06378239989280701
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,4,1,128,1,float16,fp8,0,0.0641871988773346
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,4,1,128,1,fp8,fp8,0,0.06486560106277466
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,4,2,128,1,float16,float16,0,0.06178879737854004
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,4,2,128,1,float16,fp8,0,0.06405119895935059
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,4,2,128,1,fp8,fp8,0,0.06451359987258912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,4,4,128,1,float16,float16,0,0.05970240235328674
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,4,4,128,1,float16,fp8,0,0.05957919955253601
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,4,4,128,1,fp8,fp8,0,0.059564799070358276
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,4,1,128,1,float16,float16,0,0.059680002927780154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,4,1,128,1,float16,fp8,0,0.05961920022964477
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,4,1,128,1,fp8,fp8,0,0.05958719849586487
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,4,2,128,1,float16,float16,0,0.059601598978042604
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,4,2,128,1,fp8,fp8,0,0.05968160033226013
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,4,2,128,1,float16,fp8,0,0.05956799983978271
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,4,1,128,1,float16,float16,0,0.3154128074645996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,4,1,128,1,float16,fp8,0,0.36865758895874023
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,4,1,128,1,fp8,fp8,0,0.3660543918609619
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,4,2,128,1,float16,float16,0,0.3133120059967041
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,4,2,128,1,float16,fp8,0,0.3668623924255371
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,4,2,128,1,fp8,fp8,0,0.3632848024368286
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,4,4,128,1,float16,float16,0,0.1902559995651245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,4,4,128,1,float16,fp8,0,0.19943200349807738
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,4,4,128,1,fp8,fp8,0,0.19971200227737426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,4,1,128,1,float16,float16,0,0.17507200241088866
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,4,1,128,1,float16,fp8,0,0.19901280403137206
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,4,1,128,1,fp8,fp8,0,0.19753119945526124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,4,2,128,1,float16,float16,0,0.17502880096435547
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,4,4,128,1,float16,float16,0,0.11095679998397827
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,4,2,128,1,fp8,fp8,0,0.19947359561920167
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,4,2,128,1,float16,fp8,0,0.19996960163116456
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,4,4,128,1,float16,fp8,0,0.1129871964454651
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,4,1,128,1,float16,float16,0,0.10333440303802491
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,4,4,128,1,fp8,fp8,0,0.11365280151367188
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,4,1,128,1,float16,fp8,0,0.11261600255966187
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,4,1,128,1,fp8,fp8,0,0.11232479810714721
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,4,2,128,1,float16,float16,0,0.10440640449523926
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,4,2,128,1,float16,fp8,0,0.1127087950706482
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,4,4,128,1,float16,float16,0,0.06860640048980712
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,4,2,128,1,fp8,fp8,0,0.11469759941101074
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,4,4,128,1,float16,fp8,0,0.06994400024414063
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,4,4,128,1,fp8,fp8,0,0.07051039934158325
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,4,1,128,1,float16,float16,0,0.06578879952430725
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,4,1,128,1,float16,fp8,0,0.06982880234718322
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,4,1,128,1,fp8,fp8,0,0.07041440010070801
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,4,2,128,1,float16,float16,0,0.0656816005706787
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,4,2,128,1,float16,fp8,0,0.06984480023384095
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,4,2,128,1,fp8,fp8,0,0.07010400295257568
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,4,4,128,1,float16,float16,0,0.05037760138511658
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,4,4,128,1,float16,fp8,0,0.05143359899520874
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,4,4,128,1,fp8,fp8,0,0.05131040215492248
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,4,1,128,1,float16,float16,0,0.04939680099487305
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,4,1,128,1,fp8,fp8,0,0.05137280225753784
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,4,1,128,1,float16,fp8,0,0.05138720273971557
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,4,2,128,1,float16,float16,0,0.04920639991760254
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,4,2,128,1,float16,fp8,0,0.051446402072906496
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,4,2,128,1,fp8,fp8,0,0.05120000243186951
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,4,4,128,1,float16,float16,0,0.04745599925518036
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,4,4,128,1,float16,fp8,0,0.047417598962783816
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,4,4,128,1,fp8,fp8,0,0.047358399629592894
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,4,1,128,1,float16,float16,0,0.047259199619293216
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,4,1,128,1,float16,fp8,0,0.047295999526977536
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,4,1,128,1,fp8,fp8,0,0.0473008006811142
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,4,2,128,1,float16,float16,0,0.047331199049949646
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,4,2,128,1,float16,fp8,0,0.04737600088119507
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,4,2,128,1,fp8,fp8,0,0.04731839895248413
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,4,1,128,1,float16,float16,0,0.31493439674377444
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,4,1,128,1,fp8,fp8,0,0.3877264022827148
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,4,1,128,1,float16,fp8,0,0.3906208038330078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,4,2,128,1,float16,float16,0,0.31241118907928467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,4,4,128,1,float16,float16,0,0.19021279811859132
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,4,2,128,1,float16,fp8,0,0.3901968002319336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,4,2,128,1,fp8,fp8,0,0.38652639389038085
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,4,4,128,1,float16,fp8,0,0.20913279056549072
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,4,4,128,1,fp8,fp8,0,0.20846080780029297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,4,1,128,1,float16,float16,0,0.17166080474853515
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,4,1,128,1,float16,fp8,0,0.20768799781799316
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,4,1,128,1,fp8,fp8,0,0.21041278839111327
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,4,2,128,1,float16,float16,0,0.1710096001625061
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,4,4,128,1,float16,float16,0,0.10978879928588867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,4,2,128,1,float16,fp8,0,0.2085632085800171
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,4,4,128,1,float16,fp8,0,0.11541279554367065
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,4,2,128,1,fp8,fp8,0,0.2075984001159668
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,4,4,128,1,fp8,fp8,0,0.11593600511550903
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,4,1,128,1,float16,float16,0,0.09661759734153748
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,4,1,128,1,fp8,fp8,0,0.11288800239562988
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,4,1,128,1,float16,fp8,0,0.11386239528656006
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,4,2,128,1,float16,float16,0,0.09844800233840942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,4,2,128,1,float16,fp8,0,0.11391999721527099
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,4,4,128,1,float16,float16,0,0.0655888020992279
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,4,2,128,1,fp8,fp8,0,0.11361119747161866
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,4,4,128,1,float16,fp8,0,0.06980479955673217
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,4,1,128,1,float16,float16,0,0.05953119993209839
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,4,4,128,1,fp8,fp8,0,0.06813120245933532
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,4,1,128,1,float16,fp8,0,0.06978240013122558
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,4,2,128,1,float16,float16,0,0.05999040007591248
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,4,1,128,1,fp8,fp8,0,0.06785280108451844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,4,2,128,1,float16,fp8,0,0.06991999745368957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,4,4,128,1,float16,float16,0,0.03912000060081482
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,4,2,128,1,fp8,fp8,0,0.06815999746322632
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,4,4,128,1,float16,fp8,0,0.04316959977149963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,4,4,128,1,fp8,fp8,0,0.041196799278259276
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,4,1,128,1,float16,float16,0,0.03718880116939545
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,4,1,128,1,float16,fp8,0,0.04312640130519867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,4,2,128,1,float16,float16,0,0.03716480135917664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,4,1,128,1,fp8,fp8,0,0.04126879870891571
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,4,2,128,1,float16,fp8,0,0.043112000823020934
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,4,2,128,1,fp8,fp8,0,0.0426144003868103
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,4,4,128,1,float16,float16,0,0.03705439865589142
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,4,4,128,1,float16,fp8,0,0.03721120059490204
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,4,4,128,1,fp8,fp8,0,0.037143999338150026
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,4,1,128,1,float16,float16,0,0.03500159978866577
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,4,1,128,1,float16,fp8,0,0.03715679943561554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,4,1,128,1,fp8,fp8,0,0.037136000394821164
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,4,2,128,1,float16,float16,0,0.035016000270843506
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,4,2,128,1,float16,fp8,0,0.03718400001525879
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,4,2,128,1,fp8,fp8,0,0.03709439933300018
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,4,4,128,1,float16,float16,0,0.035025599598884585
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,4,4,128,1,float16,fp8,0,0.03499679863452911
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,4,4,128,1,fp8,fp8,0,0.03500800132751465
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,4,1,128,1,float16,float16,0,0.03512159883975983
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,4,1,128,1,float16,fp8,0,0.03501920104026794
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,4,2,128,1,float16,float16,0,0.03501279950141907
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,4,1,128,1,fp8,fp8,0,0.035087999701499936
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,4,2,128,1,float16,fp8,0,0.03497920036315918
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,4,2,128,1,fp8,fp8,0,0.03498879969120026
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,4,1,128,1,float16,float16,0,0.2058176040649414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,4,1,128,1,float16,fp8,0,0.2684704065322876
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,4,1,128,1,fp8,fp8,0,0.2648655891418457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,4,2,128,1,float16,float16,0,0.20429439544677735
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,4,2,128,1,float16,fp8,0,0.2686192035675049
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,4,2,128,1,fp8,fp8,0,0.2650223970413208
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,4,4,128,1,float16,float16,0,0.12912319898605346
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,4,4,128,1,float16,fp8,0,0.1446768045425415
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,4,4,128,1,fp8,fp8,0,0.1433743953704834
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,4,1,128,1,float16,float16,0,0.11153119802474976
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,4,1,128,1,float16,fp8,0,0.14323519468307494
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,4,2,128,1,float16,float16,0,0.11393120288848876
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,4,1,128,1,fp8,fp8,0,0.14153759479522704
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,4,4,128,1,float16,float16,0,0.07538080215454102
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,4,2,128,1,fp8,fp8,0,0.14173760414123535
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,4,2,128,1,float16,fp8,0,0.144595205783844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,4,4,128,1,float16,fp8,0,0.08038399815559387
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,4,1,128,1,float16,float16,0,0.06600959897041321
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,4,4,128,1,fp8,fp8,0,0.08159040212631226
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,4,1,128,1,float16,fp8,0,0.08023840188980103
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,4,2,128,1,float16,float16,0,0.06683200001716613
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,4,1,128,1,fp8,fp8,0,0.08113600015640259
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,4,2,128,1,float16,fp8,0,0.0800704002380371
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,4,2,128,1,fp8,fp8,0,0.08104320168495179
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,4,4,128,1,float16,float16,0,0.04664320051670075
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,4,4,128,1,float16,fp8,0,0.049332800507545474
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,4,4,128,1,fp8,fp8,0,0.04992479979991913
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,4,1,128,1,float16,float16,0,0.04314239919185638
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,4,1,128,1,float16,fp8,0,0.04936479926109314
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,4,1,128,1,fp8,fp8,0,0.04995200037956238
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,4,2,128,1,float16,float16,0,0.04317759871482849
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,4,2,128,1,float16,fp8,0,0.049423998594284056
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,4,4,128,1,float16,float16,0,0.03295199871063233
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,4,2,128,1,fp8,fp8,0,0.049342399835586546
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,4,4,128,1,float16,fp8,0,0.033055999875068666
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,4,4,128,1,fp8,fp8,0,0.035017600655555724
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,4,1,128,1,float16,float16,0,0.030955201387405394
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,4,1,128,1,float16,fp8,0,0.03295199871063233
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,4,1,128,1,fp8,fp8,0,0.035062399506568906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,4,2,128,1,float16,float16,0,0.030888000130653383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,4,2,128,1,float16,fp8,0,0.03296320140361786
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,4,2,128,1,fp8,fp8,0,0.03499839901924133
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,4,4,128,1,float16,float16,0,0.030899199843406677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,4,4,128,1,float16,fp8,0,0.030904000997543334
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,4,4,128,1,fp8,fp8,0,0.030900800228118898
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,4,1,128,1,float16,float16,0,0.02906399965286255
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,4,1,128,1,float16,fp8,0,0.03091199994087219
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,4,1,128,1,fp8,fp8,0,0.0308896005153656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,4,2,128,1,float16,float16,0,0.028942400217056276
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,4,2,128,1,float16,fp8,0,0.030943998694419862
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,4,2,128,1,fp8,fp8,0,0.030921599268913268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,4,4,128,1,float16,float16,0,0.028784000873565675
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,4,4,128,1,float16,fp8,0,0.028867200016975403
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,4,4,128,1,fp8,fp8,0,0.028915199637413024
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,4,1,128,1,float16,float16,0,0.02890399992465973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,4,1,128,1,float16,fp8,0,0.028891199827194215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,4,1,128,1,fp8,fp8,0,0.028832000494003297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,4,2,128,1,float16,float16,0,0.02874239981174469
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,4,2,128,1,float16,fp8,0,0.02881920039653778
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,4,2,128,1,fp8,fp8,0,0.028984001278877257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,4,1,128,1,float16,float16,0,0.2154304027557373
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,4,1,128,1,float16,fp8,0,0.3023983955383301
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,4,1,128,1,fp8,fp8,0,0.3057791948318481
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,4,2,128,1,float16,float16,0,0.21495521068572998
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,4,2,128,1,float16,fp8,0,0.30348639488220214
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,4,2,128,1,fp8,fp8,0,0.3027535915374756
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,4,4,128,1,float16,float16,0,0.13810080289840698
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,4,4,128,1,float16,fp8,0,0.16149280071258545
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,4,4,128,1,fp8,fp8,0,0.16172000169754028
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,4,1,128,1,float16,float16,0,0.11807839870452881
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,4,1,128,1,float16,fp8,0,0.16096800565719604
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,4,1,128,1,fp8,fp8,0,0.1617840051651001
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,4,2,128,1,float16,float16,0,0.11813119649887086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,4,2,128,1,float16,fp8,0,0.16052639484405518
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,4,2,128,1,fp8,fp8,0,0.1606719970703125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,4,4,128,1,float16,float16,0,0.07845759987831116
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,4,4,128,1,float16,fp8,0,0.08847519755363464
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,4,4,128,1,fp8,fp8,0,0.0887328028678894
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,4,1,128,1,float16,float16,0,0.06629120111465454
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,4,1,128,1,float16,fp8,0,0.08669599890708923
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,4,1,128,1,fp8,fp8,0,0.08669440150260925
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,4,2,128,1,float16,fp8,0,0.08709920048713685
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,4,2,128,1,float16,float16,0,0.06812160015106201
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,4,4,128,1,float16,float16,0,0.0471343994140625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,4,2,128,1,fp8,fp8,0,0.08650879859924317
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,4,4,128,1,float16,fp8,0,0.051579201221466066
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,4,4,128,1,fp8,fp8,0,0.051502400636672975
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,4,1,128,1,float16,float16,0,0.04102399945259094
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,4,1,128,1,float16,fp8,0,0.051446402072906496
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,4,2,128,1,float16,float16,0,0.04109280109405518
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,4,1,128,1,fp8,fp8,0,0.051444798707962036
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,4,2,128,1,float16,fp8,0,0.051369601488113405
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,4,2,128,1,fp8,fp8,0,0.051472002267837526
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,4,4,128,1,float16,float16,0,0.02712000012397766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,4,4,128,1,float16,fp8,0,0.030900800228118898
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,4,1,128,1,float16,float16,0,0.024889600276947022
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,4,4,128,1,fp8,fp8,0,0.030873599648475646
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,4,1,128,1,float16,fp8,0,0.03091999888420105
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,4,2,128,1,float16,float16,0,0.024953599274158477
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,4,1,128,1,fp8,fp8,0,0.031009599566459656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,4,2,128,1,float16,fp8,0,0.030888000130653383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,4,2,128,1,fp8,fp8,0,0.030924800038337707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,4,4,128,1,float16,float16,0,0.024694399535655977
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,4,4,128,1,float16,fp8,0,0.026774400472640993
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,4,4,128,1,fp8,fp8,0,0.026740801334381104
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,4,1,128,1,float16,float16,0,0.024772800505161285
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,4,1,128,1,float16,fp8,0,0.026494398713111877
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,4,1,128,1,fp8,fp8,0,0.026204800605773924
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,4,2,128,1,float16,float16,0,0.024736000597476958
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,4,2,128,1,float16,fp8,0,0.02672480046749115
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,4,2,128,1,fp8,fp8,0,0.02666560113430023
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,4,4,128,1,float16,float16,0,0.022814400494098663
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,4,4,128,1,float16,fp8,0,0.02279199957847595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,4,4,128,1,fp8,fp8,0,0.02279520034790039
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,4,1,128,1,float16,float16,0,0.02280319929122925
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,4,1,128,1,float16,fp8,0,0.02271520048379898
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,4,1,128,1,fp8,fp8,0,0.022787199914455415
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,4,2,128,1,float16,float16,0,0.022843199968338012
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,4,2,128,1,float16,fp8,0,0.02271360009908676
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,4,4,128,1,float16,float16,0,0.022711999714374542
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,4,2,128,1,fp8,fp8,0,0.022793599963188173
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,4,4,128,1,float16,fp8,0,0.022710399329662324
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,4,4,128,1,fp8,fp8,0,0.02272319942712784
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,4,1,128,1,float16,float16,0,0.022679999470710754
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,4,1,128,1,float16,fp8,0,0.0227183997631073
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,4,1,128,1,fp8,fp8,0,0.022735999524593355
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,4,2,128,1,float16,float16,0,0.022784000635147093
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,4,2,128,1,fp8,fp8,0,0.022672000527381896
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,4,2,128,1,float16,fp8,0,0.022617599368095397
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,4,1,128,1,float16,float16,0,0.16620800495147706
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,4,1,128,1,float16,fp8,0,0.2596319913864136
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,4,1,128,1,fp8,fp8,0,0.2603343963623047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,4,2,128,1,float16,float16,0,0.16537280082702638
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,4,2,128,1,float16,fp8,0,0.2600719928741455
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,4,4,128,1,float16,float16,0,0.11057599782943725
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,4,2,128,1,fp8,fp8,0,0.26032319068908694
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,4,4,128,1,float16,fp8,0,0.13599519729614257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,4,1,128,1,float16,float16,0,0.0902944028377533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,4,4,128,1,fp8,fp8,0,0.13757120370864867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,4,1,128,1,float16,fp8,0,0.13589760065078735
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,4,2,128,1,float16,float16,0,0.09038559794425964
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,4,1,128,1,fp8,fp8,0,0.13697279691696168
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,4,2,128,1,float16,fp8,0,0.13599679470062256
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,4,4,128,1,float16,float16,0,0.06247040033340454
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,4,2,128,1,fp8,fp8,0,0.1371888041496277
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,4,4,128,1,float16,fp8,0,0.07412480115890503
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,4,1,128,1,float16,float16,0,0.04979360103607178
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,4,4,128,1,fp8,fp8,0,0.07456480264663697
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,4,1,128,1,float16,fp8,0,0.07208160161972046
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,4,1,128,1,fp8,fp8,0,0.07253280282020569
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,4,2,128,1,float16,float16,0,0.05157600045204162
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,4,4,128,1,float16,float16,0,0.0370959997177124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,4,2,128,1,float16,fp8,0,0.0730288028717041
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,4,2,128,1,fp8,fp8,0,0.07257440090179443
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,4,1,128,1,float16,float16,0,0.03086079955101013
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,4,4,128,1,float16,fp8,0,0.043196800351142886
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,4,4,128,1,fp8,fp8,0,0.04323360025882721
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,4,1,128,1,float16,fp8,0,0.04257279932498932
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,4,1,128,1,fp8,fp8,0,0.04319519996643066
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,4,2,128,1,float16,float16,0,0.031150400638580322
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,4,2,128,1,float16,fp8,0,0.04143039882183075
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,4,2,128,1,fp8,fp8,0,0.04320000112056732
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,4,4,128,1,float16,float16,0,0.022619199752807618
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,4,4,128,1,float16,fp8,0,0.024828800559043886
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,4,4,128,1,fp8,fp8,0,0.026019200682640076
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,4,1,128,1,float16,float16,0,0.018831999599933626
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,4,1,128,1,float16,fp8,0,0.02478239983320236
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,4,2,128,1,float16,float16,0,0.018646399676799773
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,4,1,128,1,fp8,fp8,0,0.02666560113430023
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,4,2,128,1,float16,fp8,0,0.024993599951267244
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,4,2,128,1,fp8,fp8,0,0.02476799935102463
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,4,4,128,1,float16,float16,0,0.018806399405002595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,4,4,128,1,float16,fp8,0,0.020684799551963805
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,4,1,128,1,float16,float16,0,0.018620799481868743
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,4,4,128,1,fp8,fp8,0,0.020732800662517547
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,4,1,128,1,float16,fp8,0,0.020664000511169435
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,4,1,128,1,fp8,fp8,0,0.020772799849510193
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,4,2,128,1,float16,float16,0,0.018559999763965607
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,4,2,128,1,float16,fp8,0,0.020636799931526183
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,4,4,128,1,float16,float16,0,0.01748320013284683
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,4,2,128,1,fp8,fp8,0,0.02077919989824295
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,4,4,128,1,float16,fp8,0,0.018592000007629395
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,4,4,128,1,fp8,fp8,0,0.01866080015897751
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,4,1,128,1,float16,float16,0,0.016631999611854555
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,4,1,128,1,float16,fp8,0,0.01854880005121231
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,4,1,128,1,fp8,fp8,0,0.01855680048465729
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,4,2,128,1,float16,float16,0,0.016731199622154237
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,4,2,128,1,float16,fp8,0,0.01855040043592453
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,4,2,128,1,fp8,fp8,0,0.01866399943828583
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,4,4,128,1,float16,float16,0,0.016651199758052827
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,4,4,128,1,float16,fp8,0,0.01656160056591034
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,4,4,128,1,fp8,fp8,0,0.01663520038127899
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,4,1,128,1,float16,float16,0,0.016606399416923524
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,4,1,128,1,float16,fp8,0,0.016620799899101257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,4,1,128,1,fp8,fp8,0,0.016551999747753142
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,4,2,128,1,float16,float16,0,0.01672160029411316
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,4,2,128,1,float16,fp8,0,0.016601599752902985
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,4,2,128,1,fp8,fp8,0,0.01656640022993088
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,4,4,128,1,float16,float16,0,0.01674560010433197
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,4,4,128,1,float16,fp8,0,0.01661760061979294
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,4,4,128,1,fp8,fp8,0,0.016579200327396394
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,4,1,128,1,float16,float16,0,0.01664000004529953
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,4,1,128,1,float16,fp8,0,0.016599999368190767
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,4,1,128,1,fp8,fp8,0,0.016624000668525696
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,4,2,128,1,float16,float16,0,0.016633599996566772
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,4,2,128,1,float16,fp8,0,0.016531200706958772
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,4,2,128,1,fp8,fp8,0,0.016625599563121797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,4,1,128,1,float16,float16,0,0.07525759935379028
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,4,1,128,1,float16,fp8,0,0.12250720262527466
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,4,1,128,1,fp8,fp8,0,0.12135839462280273
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,4,2,128,1,float16,float16,0,0.07627519965171814
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,4,2,128,1,float16,fp8,0,0.12286560535430908
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,4,2,128,1,fp8,fp8,0,0.12178239822387696
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,4,4,128,1,float16,float16,0,0.056531202793121335
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,4,4,128,1,float16,fp8,0,0.06776959896087646
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,4,1,128,1,float16,float16,0,0.04325760006904602
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,4,4,128,1,fp8,fp8,0,0.06792479753494263
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,4,1,128,1,float16,fp8,0,0.06596959829330444
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,4,1,128,1,fp8,fp8,0,0.0658399999141693
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,4,2,128,1,float16,float16,0,0.04527359902858734
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,4,4,128,1,float16,float16,0,0.03086079955101013
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,4,2,128,1,float16,fp8,0,0.06593599915504456
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,4,2,128,1,fp8,fp8,0,0.0659775972366333
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,4,4,128,1,float16,fp8,0,0.037118399143218996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,4,1,128,1,float16,float16,0,0.024864000082015992
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,4,4,128,1,fp8,fp8,0,0.037064000964164734
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,4,1,128,1,float16,fp8,0,0.03709760010242462
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,4,2,128,1,float16,float16,0,0.024766400456428528
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,4,1,128,1,fp8,fp8,0,0.03727200031280518
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,4,2,128,1,float16,fp8,0,0.03712959885597229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,4,4,128,1,float16,float16,0,0.01865279972553253
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,4,2,128,1,fp8,fp8,0,0.03707039952278137
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,4,4,128,1,float16,fp8,0,0.0227183997631073
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,4,1,128,1,float16,float16,0,0.016603200137615202
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,4,4,128,1,fp8,fp8,0,0.022780799865722658
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,4,1,128,1,float16,fp8,0,0.022724799811840057
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,4,2,128,1,float16,float16,0,0.01672320067882538
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,4,1,128,1,fp8,fp8,0,0.022742399573326112
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,4,2,128,1,float16,fp8,0,0.022694399952888487
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,4,2,128,1,fp8,fp8,0,0.022840000689029694
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,4,4,128,1,float16,float16,0,0.016551999747753142
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,4,4,128,1,float16,fp8,0,0.018641600012779237
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,4,4,128,1,fp8,fp8,0,0.018559999763965607
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,4,1,128,1,float16,float16,0,0.01451680064201355
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,4,1,128,1,float16,fp8,0,0.018593600392341612
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,4,2,128,1,float16,float16,0,0.014580799639225006
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,4,1,128,1,fp8,fp8,0,0.018561600148677825
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,4,2,128,1,float16,fp8,0,0.01863359957933426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,4,2,128,1,fp8,fp8,0,0.01863040030002594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,4,4,128,1,float16,float16,0,0.014654399454593658
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,4,4,128,1,float16,fp8,0,0.0165583997964859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,4,4,128,1,fp8,fp8,0,0.01462559998035431
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,4,1,128,1,float16,fp8,0,0.016516800224781036
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,4,1,128,1,float16,float16,0,0.014507199823856353
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,4,1,128,1,fp8,fp8,0,0.01454399973154068
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,4,2,128,1,float16,float16,0,0.014560000598430633
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,4,2,128,1,float16,fp8,0,0.016492800414562227
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,4,2,128,1,fp8,fp8,0,0.014591999351978302
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,4,4,128,1,float16,fp8,0,0.014628799259662628
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,4,4,128,1,float16,float16,0,0.014561599493026734
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,4,4,128,1,fp8,fp8,0,0.014523200690746307
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,4,1,128,1,float16,float16,0,0.014486399292945863
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,4,1,128,1,float16,fp8,0,0.01454399973154068
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,4,1,128,1,fp8,fp8,0,0.014497600495815277
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,4,2,128,1,float16,float16,0,0.014510400593280792
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,4,2,128,1,float16,fp8,0,0.014532800018787383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,4,2,128,1,fp8,fp8,0,0.014550399780273438
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,4,4,128,1,float16,float16,0,0.014212800562381745
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,4,4,128,1,float16,fp8,0,0.014483200013637542
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,4,4,128,1,fp8,fp8,0,0.01451680064201355
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,4,1,128,1,float16,float16,0,0.014476799964904785
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,4,1,128,1,float16,fp8,0,0.014510400593280792
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,4,1,128,1,fp8,fp8,0,0.014430400729179383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,4,2,128,1,float16,float16,0,0.014430400729179383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,4,2,128,1,float16,fp8,0,0.014473600685596466
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,4,2,128,1,fp8,fp8,0,0.01451520025730133
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,4,4,128,1,float16,float16,0,0.013622400164604188
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,4,4,128,1,float16,fp8,0,0.014280000329017639
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,4,4,128,1,fp8,fp8,0,0.014457599818706512
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,4,1,128,1,float16,float16,0,0.013457599282264709
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,4,1,128,1,float16,fp8,0,0.014481599628925323
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,4,1,128,1,fp8,fp8,0,0.012624000012874604
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,4,2,128,1,float16,float16,0,0.014473600685596466
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,4,2,128,1,float16,fp8,0,0.013521599769592284
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,4,2,128,1,fp8,fp8,0,0.014416000247001648
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,4,1,128,1,float16,float16,0,0.046691200137138365
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,4,1,128,1,float16,fp8,0,0.06981920003890991
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,4,1,128,1,fp8,fp8,0,0.06909120082855225
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,4,2,128,1,float16,fp8,0,0.06988959908485412
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,4,2,128,1,float16,float16,0,0.04745439887046814
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,4,2,128,1,fp8,fp8,0,0.06966559886932373
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,4,4,128,1,float16,float16,0,0.033004799485206605
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,4,4,128,1,float16,fp8,0,0.039190399646759036
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,4,4,128,1,fp8,fp8,0,0.03915199935436249
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,4,1,128,1,float16,float16,0,0.02701759934425354
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,4,1,128,1,float16,fp8,0,0.039243200421333314
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,4,1,128,1,fp8,fp8,0,0.03915359973907471
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,4,2,128,1,float16,float16,0,0.02866879999637604
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,4,2,128,1,float16,fp8,0,0.03925760090351105
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,4,2,128,1,fp8,fp8,0,0.03912160098552704
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,4,4,128,1,float16,float16,0,0.020598399639129638
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,4,4,128,1,float16,fp8,0,0.023689599335193635
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,4,4,128,1,fp8,fp8,0,0.023151999711990355
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,4,1,128,1,float16,float16,0,0.018534399569034576
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,4,1,128,1,float16,fp8,0,0.024372799694538115
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,4,1,128,1,fp8,fp8,0,0.02290399968624115
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,4,2,128,1,float16,float16,0,0.018492799997329713
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,4,2,128,1,float16,fp8,0,0.024828800559043886
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,4,4,128,1,float16,float16,0,0.01441120058298111
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,4,2,128,1,fp8,fp8,0,0.022737599909305573
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,4,1,128,1,float16,float16,0,0.012486399710178375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,4,4,128,1,fp8,fp8,0,0.01653600037097931
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,4,4,128,1,float16,fp8,0,0.016707199811935424
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,4,1,128,1,float16,fp8,0,0.016548800468444824
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,4,2,128,1,float16,float16,0,0.01255200058221817
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,4,1,128,1,fp8,fp8,0,0.016564799845218657
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,4,4,128,1,float16,float16,0,0.01252480000257492
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,4,2,128,1,fp8,fp8,0,0.01650879979133606
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,4,2,128,1,float16,fp8,0,0.016513599455356597
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,4,4,128,1,float16,fp8,0,0.013214400410652161
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,4,4,128,1,fp8,fp8,0,0.014262400567531586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,4,1,128,1,float16,float16,0,0.012399999797344208
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,4,1,128,1,float16,fp8,0,0.012628799676895142
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,4,2,128,1,float16,float16,0,0.011128000169992446
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,4,1,128,1,fp8,fp8,0,0.013798399269580841
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,4,2,128,1,float16,fp8,0,0.012617599964141846
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,4,2,128,1,fp8,fp8,0,0.012470400333404541
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,4,4,128,1,float16,fp8,0,0.012468799948692322
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,4,4,128,1,float16,float16,0,0.01141439974308014
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,4,4,128,1,fp8,fp8,0,0.012467200309038163
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,4,1,128,1,float16,float16,0,0.010518400371074677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,4,1,128,1,float16,fp8,0,0.012676799297332763
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,4,1,128,1,fp8,fp8,0,0.012457600235939026
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,4,2,128,1,fp8,fp8,0,0.012470400333404541
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,4,2,128,1,float16,fp8,0,0.012544000148773193
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,4,2,128,1,float16,float16,0,0.010526400059461594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,4,4,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,4,4,128,1,fp8,fp8,0,0.01085439994931221
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,4,4,128,1,float16,fp8,0,0.010804799944162368
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,4,1,128,1,float16,float16,0,0.01045600026845932
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,4,1,128,1,float16,fp8,0,0.010492800176143647
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,4,1,128,1,fp8,fp8,0,0.012544000148773193
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,4,2,128,1,float16,float16,0,0.010443200170993806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,4,2,128,1,float16,fp8,0,0.012508800625801087
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,4,4,128,1,float16,float16,0,0.01050880029797554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,4,2,128,1,fp8,fp8,0,0.012489599734544754
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,4,4,128,1,float16,fp8,0,0.010867200046777725
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,4,4,128,1,fp8,fp8,0,0.010428799688816071
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,4,1,128,1,float16,float16,0,0.010761599987745285
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,4,1,128,1,float16,fp8,0,0.010523200035095215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,4,1,128,1,fp8,fp8,0,0.01050880029797554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,4,2,128,1,float16,float16,0,0.01085119992494583
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,4,2,128,1,float16,fp8,0,0.010700800269842149
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,4,2,128,1,fp8,fp8,0,0.010406400263309478
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,4,4,128,1,float16,float16,0,0.010452800244092942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,4,4,128,1,float16,fp8,0,0.010552000254392624
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,4,4,128,1,fp8,fp8,0,0.010419200360774993
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,4,1,128,1,float16,float16,0,0.010630399733781815
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,4,1,128,1,float16,fp8,0,0.010555200278759003
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,4,1,128,1,fp8,fp8,0,0.010452800244092942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,4,2,128,1,float16,float16,0,0.01048479974269867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,4,2,128,1,float16,fp8,0,0.010628800094127654
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,4,2,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,4,1,128,1,float16,fp8,0,0.04740000069141388
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,4,1,128,1,float16,float16,0,0.037031999230384825
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,4,1,128,1,fp8,fp8,0,0.047295999526977536
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,4,2,128,1,float16,float16,0,0.03545759916305542
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,4,2,128,1,float16,fp8,0,0.04728800058364868
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,4,2,128,1,fp8,fp8,0,0.047577598690986635
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,4,4,128,1,float16,float16,0,0.024875199794769286
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,4,4,128,1,float16,fp8,0,0.02887519896030426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,4,4,128,1,fp8,fp8,0,0.028896000981330872
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,4,1,128,1,float16,float16,0,0.022924800217151643
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,4,1,128,1,float16,fp8,0,0.02900480031967163
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,4,1,128,1,fp8,fp8,0,0.028908801078796387
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,4,2,128,1,float16,float16,0,0.022864000499248506
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,4,2,128,1,float16,fp8,0,0.02905279994010925
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,4,2,128,1,fp8,fp8,0,0.028964799642562867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,4,4,128,1,float16,float16,0,0.016575999557971954
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,4,4,128,1,float16,fp8,0,0.01867839992046356
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,4,4,128,1,fp8,fp8,0,0.018783999979496
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,4,1,128,1,float16,float16,0,0.015830400586128234
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,4,1,128,1,float16,fp8,0,0.018620799481868743
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,4,1,128,1,fp8,fp8,0,0.018651199340820313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,4,2,128,1,float16,float16,0,0.016579200327396394
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,4,2,128,1,float16,fp8,0,0.018681600689888
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,4,2,128,1,fp8,fp8,0,0.0186256006360054
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,4,4,128,1,float16,float16,0,0.012736000120639801
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,4,4,128,1,float16,fp8,0,0.012542399764060973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,4,4,128,1,fp8,fp8,0,0.012627199292182922
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,4,1,128,1,float16,float16,0,0.01239359974861145
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,4,1,128,1,float16,fp8,0,0.012675200402736665
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,4,1,128,1,fp8,fp8,0,0.012510399520397186
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,4,2,128,1,float16,float16,0,0.012432000041007996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,4,2,128,1,float16,fp8,0,0.012625600397586822
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,4,2,128,1,fp8,fp8,0,0.012671999633312225
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,4,4,128,1,float16,fp8,0,0.012415999919176102
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,4,4,128,1,float16,float16,0,0.011900799721479416
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,4,4,128,1,fp8,fp8,0,0.012495999783277511
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,4,1,128,1,float16,float16,0,0.010585600137710571
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,4,1,128,1,float16,fp8,0,0.012564800679683685
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,4,1,128,1,fp8,fp8,0,0.012563200294971466
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,4,2,128,1,float16,float16,0,0.01050880029797554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,4,2,128,1,fp8,fp8,0,0.01250080019235611
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,4,2,128,1,float16,fp8,0,0.012495999783277511
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,4,4,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,4,4,128,1,float16,fp8,0,0.010532800108194351
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,4,1,128,1,float16,float16,0,0.010478399693965912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,4,4,128,1,fp8,fp8,0,0.010595200210809707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,4,1,128,1,float16,fp8,0,0.010515200346708298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,4,1,128,1,fp8,fp8,0,0.010529600083827972
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,4,2,128,1,float16,float16,0,0.010608000308275222
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,4,2,128,1,float16,fp8,0,0.010463999956846238
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,4,2,128,1,fp8,fp8,0,0.01053439974784851
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,4,4,128,1,float16,float16,0,0.01055999994277954
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,4,4,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,4,4,128,1,fp8,fp8,0,0.010567999631166457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,4,1,128,1,float16,float16,0,0.010590399801731109
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,4,1,128,1,float16,fp8,0,0.010367999970912933
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,4,1,128,1,fp8,fp8,0,0.010583999752998351
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,4,2,128,1,float16,float16,0,0.010592000186443329
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,4,2,128,1,float16,fp8,0,0.010396800190210342
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,4,2,128,1,fp8,fp8,0,0.010467199981212616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,4,4,128,1,float16,float16,0,0.010576000064611435
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,4,4,128,1,float16,fp8,0,0.010441599786281586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,4,4,128,1,fp8,fp8,0,0.010523200035095215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,4,1,128,1,float16,float16,0,0.010628800094127654
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,4,1,128,1,float16,fp8,0,0.010372799634933472
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,4,1,128,1,fp8,fp8,0,0.010363200306892395
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,4,2,128,1,float16,float16,0,0.010542400181293488
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,4,2,128,1,float16,fp8,0,0.010451199859380722
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,4,2,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,4,4,128,1,float16,float16,0,0.010489600151777268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,4,4,128,1,float16,fp8,0,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,4,4,128,1,fp8,fp8,0,0.01051200032234192
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,4,1,128,1,float16,float16,0,0.010411199927330018
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,4,1,128,1,float16,fp8,0,0.010486400127410889
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,4,1,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,4,2,128,1,float16,float16,0,0.010491199791431427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,4,2,128,1,float16,fp8,0,0.01045600026845932
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,4,2,128,1,fp8,fp8,0,0.010366400331258773
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,4,1,128,1,float16,float16,0,0.031083199381828307
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,4,1,128,1,float16,fp8,0,0.037084800004959104
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,4,1,128,1,fp8,fp8,0,0.037110400199890134
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,4,2,128,1,float16,fp8,0,0.03700959980487824
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,4,2,128,1,float16,float16,0,0.031041601300239564
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,4,2,128,1,fp8,fp8,0,0.03702560067176819
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,4,4,128,1,float16,float16,0,0.02274720072746277
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,4,4,128,1,float16,fp8,0,0.022830399870872497
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,4,4,128,1,fp8,fp8,0,0.02285439968109131
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,4,1,128,1,float16,float16,0,0.02072640061378479
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,4,1,128,1,float16,fp8,0,0.022758400440216063
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,4,1,128,1,fp8,fp8,0,0.022801600396633148
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,4,2,128,1,float16,fp8,0,0.022841599583625794
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,4,2,128,1,float16,float16,0,0.02062080055475235
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,4,2,128,1,fp8,fp8,0,0.022889600694179536
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,4,4,128,1,float16,float16,0,0.01470080018043518
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,4,4,128,1,float16,fp8,0,0.014828799664974213
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,4,4,128,1,fp8,fp8,0,0.01648319959640503
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,4,1,128,1,float16,float16,0,0.014689600467681885
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,4,1,128,1,float16,fp8,0,0.014574399590492249
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,4,1,128,1,fp8,fp8,0,0.016523200273513793
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,4,2,128,1,float16,float16,0,0.014697599411010741
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,4,2,128,1,float16,fp8,0,0.014951999485492706
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,4,2,128,1,fp8,fp8,0,0.01646080017089844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,4,4,128,1,float16,float16,0,0.01064319983124733
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,4,4,128,1,float16,fp8,0,0.012324800342321396
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,4,4,128,1,fp8,fp8,0,0.012511999905109405
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,4,1,128,1,float16,float16,0,0.010579200088977813
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,4,1,128,1,float16,fp8,0,0.012411200255155564
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,4,1,128,1,fp8,fp8,0,0.01242239996790886
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,4,2,128,1,float16,float16,0,0.010441599786281586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,4,2,128,1,float16,fp8,0,0.012439999729394913
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,4,2,128,1,fp8,fp8,0,0.012488000094890594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,4,4,128,1,float16,float16,0,0.010547199845314026
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,4,4,128,1,float16,fp8,0,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,4,4,128,1,fp8,fp8,0,0.010590399801731109
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,4,1,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,4,1,128,1,float16,fp8,0,0.010494399815797806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,4,1,128,1,fp8,fp8,0,0.01048320010304451
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,4,2,128,1,float16,float16,0,0.010491199791431427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,4,2,128,1,float16,fp8,0,0.010414399951696397
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,4,2,128,1,fp8,fp8,0,0.010516799986362457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,4,4,128,1,float16,float16,0,0.01043360009789467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,4,4,128,1,fp8,fp8,0,0.010447999835014344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,4,4,128,1,float16,fp8,0,0.010630399733781815
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,4,1,128,1,float16,float16,0,0.010414399951696397
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,4,1,128,1,float16,fp8,0,0.010531199723482132
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,4,2,128,1,float16,fp8,0,0.010355199873447418
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,4,2,128,1,float16,float16,0,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,4,1,128,1,fp8,fp8,0,0.010427200049161912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,4,2,128,1,fp8,fp8,0,0.010419200360774993
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,4,4,128,1,float16,float16,0,0.010363200306892395
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,4,4,128,1,float16,fp8,0,0.010412800312042236
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,4,4,128,1,fp8,fp8,0,0.010558400303125381
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,4,1,128,1,float16,float16,0,0.010380800068378448
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,4,1,128,1,float16,fp8,0,0.01040000021457672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,4,1,128,1,fp8,fp8,0,0.01035040020942688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,4,2,128,1,float16,float16,0,0.01037440001964569
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,4,2,128,1,float16,fp8,0,0.010436800122261048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,4,2,128,1,fp8,fp8,0,0.010375999659299851
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,4,4,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,4,4,128,1,float16,fp8,0,0.010371199995279311
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,4,4,128,1,fp8,fp8,0,0.010390400141477584
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,4,1,128,1,float16,float16,0,0.0104032002389431
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,4,1,128,1,float16,fp8,0,0.010467199981212616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,4,1,128,1,fp8,fp8,0,0.010315199941396713
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,4,2,128,1,float16,float16,0,0.010441599786281586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,4,2,128,1,float16,fp8,0,0.010345599800348281
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,4,2,128,1,fp8,fp8,0,0.010428799688816071
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,4,4,128,1,float16,float16,0,0.010412800312042236
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,4,4,128,1,float16,fp8,0,0.010395199805498124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,4,4,128,1,fp8,fp8,0,0.010468800365924836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,4,1,128,1,float16,float16,0,0.010438399761915207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,4,1,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,4,1,128,1,fp8,fp8,0,0.010353600233793258
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,4,2,128,1,float16,float16,0,0.010409600287675857
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,4,2,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,4,2,128,1,fp8,fp8,0,0.010337600111961364
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,4,1,128,1,float16,fp8,0,0.03091999888420105
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,4,1,128,1,float16,float16,0,0.03091999888420105
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,4,1,128,1,fp8,fp8,0,0.03218559920787811
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,4,2,128,1,float16,float16,0,0.030193600058555602
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,4,4,128,1,float16,float16,0,0.02062080055475235
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,4,2,128,1,float16,fp8,0,0.031600001454353335
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,4,2,128,1,fp8,fp8,0,0.03218719959259033
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,4,1,128,1,float16,float16,0,0.020531199872493744
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,4,4,128,1,float16,fp8,0,0.020691199600696562
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,4,4,128,1,fp8,fp8,0,0.02075359970331192
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,4,1,128,1,float16,fp8,0,0.0208079993724823
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,4,1,128,1,fp8,fp8,0,0.020660799741744996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,4,2,128,1,float16,float16,0,0.02012320011854172
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,4,4,128,1,float16,float16,0,0.014448000490665436
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,4,2,128,1,float16,fp8,0,0.02077440023422241
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,4,2,128,1,fp8,fp8,0,0.020660799741744996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,4,4,128,1,float16,fp8,0,0.014511999487876893
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,4,4,128,1,fp8,fp8,0,0.014633600413799287
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,4,1,128,1,float16,float16,0,0.014511999487876893
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,4,1,128,1,float16,fp8,0,0.014502400159835815
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,4,1,128,1,fp8,fp8,0,0.014504000544548035
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,4,2,128,1,float16,float16,0,0.014496000111103058
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,4,2,128,1,float16,fp8,0,0.014502400159835815
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,4,2,128,1,fp8,fp8,0,0.014511999487876893
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,4,4,128,1,float16,float16,0,0.01053759977221489
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,4,4,128,1,float16,fp8,0,0.010438399761915207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,4,4,128,1,fp8,fp8,0,0.01056319996714592
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,4,1,128,1,float16,float16,0,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,4,1,128,1,fp8,fp8,0,0.011508800089359283
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,4,1,128,1,float16,fp8,0,0.010507199913263321
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,4,2,128,1,float16,float16,0,0.010471999645233154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,4,2,128,1,float16,fp8,0,0.012094400078058242
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,4,2,128,1,fp8,fp8,0,0.010435199737548828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,4,4,128,1,float16,float16,0,0.010419200360774993
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,4,4,128,1,float16,fp8,0,0.01048320010304451
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,4,4,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,4,1,128,1,float16,float16,0,0.010462400317192078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,4,1,128,1,float16,fp8,0,0.010553599894046783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,4,2,128,1,float16,float16,0,0.01040479987859726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,4,1,128,1,fp8,fp8,0,0.01045600026845932
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,4,2,128,1,float16,fp8,0,0.010524799674749374
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,4,2,128,1,fp8,fp8,0,0.010380800068378448
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,4,4,128,1,float16,float16,0,0.010436800122261048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,4,4,128,1,float16,fp8,0,0.010486400127410889
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,4,4,128,1,fp8,fp8,0,0.010411199927330018
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,4,1,128,1,float16,float16,0,0.010452800244092942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,4,1,128,1,float16,fp8,0,0.010363200306892395
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,4,1,128,1,fp8,fp8,0,0.010363200306892395
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,4,2,128,1,float16,float16,0,0.01043040007352829
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,4,2,128,1,float16,fp8,0,0.010407999902963639
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,4,2,128,1,fp8,fp8,0,0.010390400141477584
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,4,4,128,1,float16,float16,0,0.01043199971318245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,4,4,128,1,float16,fp8,0,0.010385599732398988
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,4,4,128,1,fp8,fp8,0,0.010375999659299851
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,4,1,128,1,float16,float16,0,0.010398399829864503
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,4,1,128,1,float16,fp8,0,0.010409600287675857
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,4,1,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,4,2,128,1,float16,float16,0,0.010414399951696397
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,4,2,128,1,float16,fp8,0,0.010363200306892395
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,4,2,128,1,fp8,fp8,0,0.01034879982471466
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,4,4,128,1,float16,float16,0,0.01037919968366623
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,4,4,128,1,float16,fp8,0,0.010462400317192078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,4,4,128,1,fp8,fp8,0,0.010424000024795533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,4,1,128,1,float16,float16,0,0.010416000336408614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,4,1,128,1,float16,fp8,0,0.010345599800348281
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,4,1,128,1,fp8,fp8,0,0.01034879982471466
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,4,2,128,1,float16,float16,0,0.01043360009789467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,4,2,128,1,float16,fp8,0,0.010369600355625152
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,4,2,128,1,fp8,fp8,0,0.010369600355625152
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,4,4,128,1,float16,float16,0,0.010507199913263321
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,4,4,128,1,float16,fp8,0,0.010412800312042236
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,4,4,128,1,fp8,fp8,0,0.010452800244092942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,4,1,128,1,float16,float16,0,0.010388799756765366
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,4,1,128,1,float16,fp8,0,0.010500799864530563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,4,1,128,1,fp8,fp8,0,0.010416000336408614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,4,2,128,1,float16,float16,0,0.010390400141477584
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,4,2,128,1,float16,fp8,0,0.010515200346708298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,4,1,128,1,float16,fp8,0,0.026830399036407472
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,4,2,128,1,fp8,fp8,0,0.010571199655532836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,4,1,128,1,float16,float16,0,0.02890399992465973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,4,2,128,1,float16,fp8,0,0.027009600400924684
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,4,2,128,1,float16,float16,0,0.029232001304626463
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,4,1,128,1,fp8,fp8,0,0.026820799708366393
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,4,2,128,1,fp8,fp8,0,0.02686559855937958
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,4,4,128,1,float16,fp8,0,0.018624000251293182
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,4,4,128,1,float16,float16,0,0.01915840059518814
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,4,4,128,1,fp8,fp8,0,0.01876160055398941
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,4,1,128,1,float16,float16,0,0.018675200641155243
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,4,1,128,1,float16,fp8,0,0.018607999384403228
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,4,1,128,1,fp8,fp8,0,0.01863040030002594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,4,2,128,1,float16,float16,0,0.018692800402641298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,4,2,128,1,float16,fp8,0,0.018779200315475465
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,4,4,128,1,float16,float16,0,0.014531199634075165
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,4,2,128,1,fp8,fp8,0,0.018585599958896637
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,4,4,128,1,float16,fp8,0,0.014500799775123595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,4,4,128,1,fp8,fp8,0,0.012513600289821625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,4,1,128,1,float16,float16,0,0.014558400213718414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,4,1,128,1,float16,fp8,0,0.012783999741077422
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,4,1,128,1,fp8,fp8,0,0.012489599734544754
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,4,2,128,1,float16,float16,0,0.01449279934167862
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,4,2,128,1,float16,fp8,0,0.01326880007982254
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,4,2,128,1,fp8,fp8,0,0.01249919980764389
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,4,4,128,1,float16,float16,0,0.010489600151777268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,4,4,128,1,float16,fp8,0,0.010596799850463866
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,4,4,128,1,fp8,fp8,0,0.010499200224876404
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,4,1,128,1,float16,float16,0,0.011671999841928482
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,4,1,128,1,float16,fp8,0,0.010548800230026245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,4,1,128,1,fp8,fp8,0,0.01040479987859726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,4,2,128,1,float16,float16,0,0.011108800023794174
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,4,2,128,1,float16,fp8,0,0.010576000064611435
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,4,2,128,1,fp8,fp8,0,0.010444799810647965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,4,4,128,1,float16,float16,0,0.010503999888896942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,4,4,128,1,float16,fp8,0,0.010497599840164185
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,4,1,128,1,float16,float16,0,0.010416000336408614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,4,4,128,1,fp8,fp8,0,0.01037440001964569
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,4,1,128,1,float16,fp8,0,0.01050880029797554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,4,2,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,4,1,128,1,fp8,fp8,0,0.010351999849081039
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,4,2,128,1,float16,fp8,0,0.010339199751615524
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,4,2,128,1,fp8,fp8,0,0.010425599664449692
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,4,4,128,1,float16,float16,0,0.010436800122261048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,4,4,128,1,float16,fp8,0,0.010480000078678131
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,4,4,128,1,fp8,fp8,0,0.010435199737548828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,4,1,128,1,float16,float16,0,0.010367999970912933
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,4,1,128,1,float16,fp8,0,0.010497599840164185
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,4,2,128,1,float16,float16,0,0.010406400263309478
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,4,1,128,1,fp8,fp8,0,0.010414399951696397
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,4,2,128,1,float16,fp8,0,0.010438399761915207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,4,4,128,1,float16,float16,0,0.010396800190210342
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,4,4,128,1,float16,fp8,0,0.010361599922180175
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,4,2,128,1,fp8,fp8,0,0.010407999902963639
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,4,1,128,1,float16,float16,0,0.010416000336408614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,4,4,128,1,fp8,fp8,0,0.010351999849081039
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,4,1,128,1,float16,fp8,0,0.010363200306892395
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,4,1,128,1,fp8,fp8,0,0.010355199873447418
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,4,2,128,1,float16,float16,0,0.010428799688816071
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,4,2,128,1,float16,fp8,0,0.010401599854230881
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,4,2,128,1,fp8,fp8,0,0.010441599786281586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,4,4,128,1,float16,float16,0,0.010387200117111205
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,4,4,128,1,float16,fp8,0,0.010366400331258773
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,4,4,128,1,fp8,fp8,0,0.010355199873447418
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,4,1,128,1,float16,float16,0,0.010384000092744827
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,4,1,128,1,float16,fp8,0,0.010424000024795533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,4,2,128,1,float16,float16,0,0.01045600026845932
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,4,1,128,1,fp8,fp8,0,0.010367999970912933
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,4,2,128,1,float16,fp8,0,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,4,2,128,1,fp8,fp8,0,0.010284800082445145
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,4,4,128,1,float16,float16,0,0.010367999970912933
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,4,4,128,1,float16,fp8,0,0.010416000336408614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,4,4,128,1,fp8,fp8,0,0.010452800244092942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,4,1,128,1,float16,float16,0,0.010367999970912933
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,4,1,128,1,float16,fp8,0,0.010358399897813796
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,4,1,128,1,fp8,fp8,0,0.0103472001850605
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,4,2,128,1,float16,float16,0,0.010441599786281586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,4,2,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,4,2,128,1,float16,fp8,0,0.010451199859380722
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,2,1,128,1,float16,float16,0,0.8941535949707031
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,2,1,128,1,float16,fp8,0,0.8877424240112305
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,2,2,128,1,float16,float16,0,0.5242928028106689
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,2,2,128,1,float16,fp8,0,0.5095920085906982
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16384,2,1,128,1,fp8,fp8,0,0.8945455551147461
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,2,2,128,1,fp8,fp8,0,0.5093376159667968
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,2,1,128,1,float16,float16,0,0.5167168140411377
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,2,1,128,1,float16,fp8,0,0.5128575801849365
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,2,2,128,1,float16,float16,0,0.32485759258270264
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,2,1,128,1,fp8,fp8,0,0.5094560146331787
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,2,2,128,1,fp8,fp8,0,0.3186480045318604
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,2,2,128,1,float16,fp8,0,0.31891679763793945
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,2,1,128,1,float16,float16,0,0.3255647897720337
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,2,2,128,1,float16,float16,0,0.20725278854370116
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,2,1,128,1,float16,fp8,0,0.3163583993911743
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,2,1,128,1,fp8,fp8,0,0.31988480091094973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,2,2,128,1,fp8,fp8,0,0.20371360778808595
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,2,2,128,1,float16,fp8,0,0.20449440479278563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,2,1,128,1,float16,float16,0,0.20799040794372559
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,2,1,128,1,float16,fp8,0,0.20452959537506105
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,2,1,128,1,fp8,fp8,0,0.20418241024017333
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,2,1,128,1,float16,float16,0,0.5572896003723145
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,2,1,128,1,fp8,fp8,0,0.566321611404419
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,12288,2,1,128,1,float16,fp8,0,0.5657216072082519
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,2,2,128,1,float16,float16,0,0.33333919048309324
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,2,2,128,1,float16,fp8,0,0.33010880947113036
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,2,2,128,1,fp8,fp8,0,0.33005120754241946
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,2,1,128,1,float16,float16,0,0.3288095951080322
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,2,1,128,1,float16,fp8,0,0.33030240535736083
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,2,1,128,1,fp8,fp8,0,0.33152480125427247
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,2,2,128,1,float16,float16,0,0.20861759185791015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,2,2,128,1,float16,fp8,0,0.20679678916931152
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,2,2,128,1,fp8,fp8,0,0.2082063913345337
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,2,1,128,1,float16,float16,0,0.2077967882156372
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,2,1,128,1,float16,fp8,0,0.20914719104766846
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,2,2,128,1,float16,float16,0,0.15406719446182252
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,2,1,128,1,fp8,fp8,0,0.2061232089996338
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,2,2,128,1,float16,fp8,0,0.15575040578842164
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,2,1,128,1,float16,float16,0,0.15403679609298707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,2,1,128,1,float16,fp8,0,0.15434399843215943
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,2,2,128,1,fp8,fp8,0,0.1561247944831848
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,2,1,128,1,fp8,fp8,0,0.15579359531402587
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,2,2,128,1,float16,float16,0,0.25438239574432375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,2,1,128,1,float16,float16,0,0.4134367942810059
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,2,1,128,1,float16,fp8,0,0.4312416076660156
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,10240,2,1,128,1,fp8,fp8,0,0.42879681587219237
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,2,2,128,1,float16,fp8,0,0.25726239681243895
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,2,2,128,1,fp8,fp8,0,0.25823040008544923
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,2,1,128,1,float16,float16,0,0.25126559734344484
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,2,1,128,1,float16,fp8,0,0.25725278854370115
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,2,2,128,1,float16,fp8,0,0.15246399641036987
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,2,2,128,1,float16,float16,0,0.1500815987586975
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,2,1,128,1,fp8,fp8,0,0.25915040969848635
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,2,2,128,1,fp8,fp8,0,0.1536128044128418
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,2,1,128,1,float16,float16,0,0.1497007966041565
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,2,1,128,1,float16,fp8,0,0.15308799743652343
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,2,2,128,1,float16,float16,0,0.13102400302886963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,2,1,128,1,fp8,fp8,0,0.1539888024330139
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,2,2,128,1,float16,fp8,0,0.1313423991203308
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,2,2,128,1,fp8,fp8,0,0.1293984055519104
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,2,1,128,1,float16,float16,0,0.13015040159225463
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,2,1,128,1,float16,fp8,0,0.13118879795074462
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,2,1,128,1,fp8,fp8,0,0.1313696026802063
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,2,2,128,1,float16,float16,0,0.2931519985198975
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,2,1,128,1,float16,float16,0,0.5000127792358399
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,2,1,128,1,float16,fp8,0,0.5482384204864502
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,8192,2,1,128,1,fp8,fp8,0,0.5518320083618165
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,2,2,128,1,float16,fp8,0,0.3101088047027588
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,2,2,128,1,fp8,fp8,0,0.3107327938079834
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,2,1,128,1,float16,float16,0,0.2878592014312744
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,2,1,128,1,float16,fp8,0,0.3116192102432251
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,2,2,128,1,float16,float16,0,0.18209279775619508
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,2,2,128,1,float16,fp8,0,0.18971519470214843
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,2,1,128,1,fp8,fp8,0,0.3107055902481079
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,2,2,128,1,fp8,fp8,0,0.19104959964752197
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,2,1,128,1,float16,float16,0,0.18049440383911133
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,2,1,128,1,float16,fp8,0,0.18888640403747559
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,2,2,128,1,float16,float16,0,0.11300959587097167
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,2,1,128,1,fp8,fp8,0,0.18906879425048828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,2,2,128,1,float16,fp8,0,0.11769119501113892
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,2,1,128,1,float16,float16,0,0.11298079490661621
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,2,2,128,1,fp8,fp8,0,0.11735359430313111
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,2,1,128,1,float16,fp8,0,0.11809760332107544
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,2,1,128,1,fp8,fp8,0,0.11734559535980224
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,2,2,128,1,float16,float16,0,0.10676640272140503
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,2,2,128,1,float16,fp8,0,0.1068511962890625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,2,2,128,1,fp8,fp8,0,0.10679359436035156
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,2,1,128,1,float16,float16,0,0.10474720001220703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,2,1,128,1,fp8,fp8,0,0.10667680501937866
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,2,1,128,1,float16,fp8,0,0.10677920579910279
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,2,1,128,1,float16,float16,0,0.3140399932861328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,2,1,128,1,float16,fp8,0,0.3587039947509766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,6144,2,1,128,1,fp8,fp8,0,0.3623759984970093
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,2,2,128,1,float16,float16,0,0.18760000467300414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,2,2,128,1,float16,fp8,0,0.2045423984527588
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,2,2,128,1,fp8,fp8,0,0.2059648036956787
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,2,1,128,1,float16,float16,0,0.1844640016555786
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,2,2,128,1,float16,float16,0,0.11627360582351684
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,2,1,128,1,float16,fp8,0,0.20552639961242675
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,2,1,128,1,fp8,fp8,0,0.20586879253387452
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,2,2,128,1,float16,fp8,0,0.12549279928207396
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,2,1,128,1,float16,float16,0,0.11645920276641845
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,2,2,128,1,fp8,fp8,0,0.12652959823608398
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,2,2,128,1,float16,float16,0,0.08511999845504761
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,2,1,128,1,float16,fp8,0,0.12569279670715333
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,2,1,128,1,fp8,fp8,0,0.12690240144729614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,2,2,128,1,float16,fp8,0,0.09039679765701295
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,2,2,128,1,fp8,fp8,0,0.09039679765701295
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,2,1,128,1,float16,float16,0,0.08544639945030212
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,2,1,128,1,float16,fp8,0,0.09033120274543763
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,2,1,128,1,fp8,fp8,0,0.09039679765701295
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,2,2,128,1,float16,float16,0,0.08218560218811036
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,2,2,128,1,float16,fp8,0,0.0841871976852417
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,2,2,128,1,fp8,fp8,0,0.08313440084457398
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,2,1,128,1,float16,float16,0,0.08215199708938599
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,2,1,128,1,float16,fp8,0,0.08388959765434265
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,2,1,128,1,fp8,fp8,0,0.08239840269088745
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,2,1,128,1,float16,float16,0,0.2976783990859985
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,2,1,128,1,float16,fp8,0,0.3727871894836426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,4096,2,1,128,1,fp8,fp8,0,0.3709376096725464
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,2,2,128,1,float16,float16,0,0.1717087984085083
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,2,2,128,1,float16,fp8,0,0.2043071985244751
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,2,2,128,1,fp8,fp8,0,0.20565760135650635
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,2,1,128,1,float16,float16,0,0.170360004901886
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,2,2,128,1,float16,float16,0,0.10581920146942139
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,2,1,128,1,float16,fp8,0,0.20393118858337403
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,2,1,128,1,fp8,fp8,0,0.20461440086364746
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,2,2,128,1,float16,fp8,0,0.12223520278930664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,2,2,128,1,fp8,fp8,0,0.12104480266571045
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,2,1,128,1,float16,float16,0,0.10384960174560547
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,2,1,128,1,float16,fp8,0,0.12289279699325562
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,2,2,128,1,float16,float16,0,0.06428639888763428
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,2,1,128,1,fp8,fp8,0,0.12208479642868042
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,2,2,128,1,float16,fp8,0,0.07400959730148315
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,2,2,128,1,fp8,fp8,0,0.07348960041999816
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,2,1,128,1,float16,float16,0,0.0639743983745575
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,2,1,128,1,float16,fp8,0,0.0739952027797699
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,2,1,128,1,fp8,fp8,0,0.0738207995891571
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,2,2,128,1,float16,float16,0,0.060134398937225345
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,2,2,128,1,float16,fp8,0,0.06364799737930298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,2,2,128,1,fp8,fp8,0,0.06367999911308289
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,2,1,128,1,float16,float16,0,0.05974400043487549
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,2,1,128,1,float16,fp8,0,0.06382079720497132
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,2,2,128,1,float16,float16,0,0.057550400495529175
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,2,1,128,1,fp8,fp8,0,0.06369600296020508
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,2,2,128,1,float16,fp8,0,0.05955520272254944
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,2,1,128,1,float16,float16,0,0.05764319896697998
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,2,2,128,1,fp8,fp8,0,0.059545600414276124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,2,1,128,1,float16,fp8,0,0.059592002630233766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,2,1,128,1,fp8,fp8,0,0.05956799983978271
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,2,1,128,1,float16,float16,0,0.19039360284805298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,2,1,128,1,float16,fp8,0,0.2527519941329956
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,2,2,128,1,float16,float16,0,0.11160160303115844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,3072,2,1,128,1,fp8,fp8,0,0.2525615930557251
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,2,1,128,1,float16,float16,0,0.11080479621887207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,2,2,128,1,float16,fp8,0,0.14049119949340821
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,2,2,128,1,fp8,fp8,0,0.1403231978416443
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,2,2,128,1,float16,float16,0,0.06847519874572754
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,2,1,128,1,float16,fp8,0,0.14055360555648805
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,2,1,128,1,fp8,fp8,0,0.13988800048828126
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,2,2,128,1,float16,fp8,0,0.08248800039291382
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,2,2,128,1,fp8,fp8,0,0.08333280086517333
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,2,1,128,1,float16,float16,0,0.06923999786376953
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,2,1,128,1,float16,fp8,0,0.08238880038261413
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,2,2,128,1,float16,float16,0,0.04991840124130249
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,2,1,128,1,fp8,fp8,0,0.08364319801330566
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,2,2,128,1,float16,fp8,0,0.057608002424240114
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,2,2,128,1,fp8,fp8,0,0.05759040117263794
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,2,1,128,1,float16,float16,0,0.049595201015472413
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,2,1,128,1,float16,fp8,0,0.05756319761276245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,2,1,128,1,fp8,fp8,0,0.05762079954147339
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,2,2,128,1,float16,float16,0,0.047286400198936464
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,2,2,128,1,float16,fp8,0,0.05138720273971557
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,2,2,128,1,fp8,fp8,0,0.05135840177536011
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,2,1,128,1,float16,float16,0,0.04733439981937408
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,2,1,128,1,fp8,fp8,0,0.05111039876937866
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,2,2,128,1,float16,float16,0,0.04721280038356781
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,2,1,128,1,float16,fp8,0,0.049369600415229795
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,2,1,128,1,float16,float16,0,0.04722239971160889
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,2,2,128,1,fp8,fp8,0,0.047367998957633974
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,2,1,128,1,float16,fp8,0,0.04723039865493774
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,2,2,128,1,float16,fp8,0,0.04728800058364868
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,2,1,128,1,fp8,fp8,0,0.04726719856262207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,2,1,128,1,float16,float16,0,0.19020960330963135
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,2,2,128,1,float16,float16,0,0.10903840065002442
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,2,1,128,1,fp8,fp8,0,0.2802448034286499
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,2048,2,1,128,1,float16,fp8,0,0.2801728010177612
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,2,2,128,1,float16,fp8,0,0.1513216018676758
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,2,2,128,1,fp8,fp8,0,0.15209280252456664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,2,1,128,1,float16,float16,0,0.10744479894638062
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,2,1,128,1,float16,fp8,0,0.14869439601898193
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,2,2,128,1,float16,float16,0,0.06623200178146363
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,2,2,128,1,float16,fp8,0,0.0864687979221344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,2,1,128,1,fp8,fp8,0,0.14942560195922852
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,2,2,128,1,fp8,fp8,0,0.08638719916343689
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,2,1,128,1,float16,float16,0,0.06406400203704835
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,2,1,128,1,float16,fp8,0,0.08749120235443116
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,2,1,128,1,fp8,fp8,0,0.08630399703979492
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,2,2,128,1,float16,float16,0,0.039190399646759036
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,2,2,128,1,float16,fp8,0,0.051520001888275144
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,2,1,128,1,float16,float16,0,0.03935199975967407
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,2,2,128,1,fp8,fp8,0,0.05137280225753784
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,2,1,128,1,float16,fp8,0,0.051451200246810914
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,2,2,128,1,float16,float16,0,0.03707360029220581
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,2,1,128,1,fp8,fp8,0,0.05138239860534668
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,2,2,128,1,float16,fp8,0,0.041257598996162416
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,2,1,128,1,float16,float16,0,0.037110400199890134
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,2,2,128,1,fp8,fp8,0,0.04124000072479248
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,2,1,128,1,float16,fp8,0,0.0412416011095047
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,2,2,128,1,float16,float16,0,0.03503519892692566
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,2,1,128,1,fp8,fp8,0,0.041231998801231386
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,2,2,128,1,float16,fp8,0,0.037064000964164734
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,2,2,128,1,fp8,fp8,0,0.03705280125141144
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,2,1,128,1,float16,float16,0,0.03508319854736328
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,2,1,128,1,float16,fp8,0,0.03700479865074158
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,2,1,128,1,fp8,fp8,0,0.03697119951248169
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,2,2,128,1,float16,float16,0,0.034948799014091494
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,2,2,128,1,float16,fp8,0,0.034999999403953555
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,2,2,128,1,fp8,fp8,0,0.03495840132236481
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,2,1,128,1,float16,float16,0,0.034980800747871396
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,2,1,128,1,float16,fp8,0,0.035062399506568906
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,2,1,128,1,fp8,fp8,0,0.03499839901924133
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,2,1,128,1,float16,float16,0,0.12819679975509643
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,2,1,128,1,float16,fp8,0,0.1968191981315613
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,2,2,128,1,float16,float16,0,0.07571359872817993
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1536,2,1,128,1,fp8,fp8,0,0.19496480226516724
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,2,2,128,1,float16,fp8,0,0.10711040496826171
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,2,2,128,1,fp8,fp8,0,0.10695199966430664
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,2,1,128,1,float16,float16,0,0.07398560047149658
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,2,1,128,1,float16,fp8,0,0.1075808048248291
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,2,2,128,1,float16,float16,0,0.04626719951629639
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,2,1,128,1,fp8,fp8,0,0.10689120292663574
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,2,2,128,1,float16,fp8,0,0.063673597574234
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,2,1,128,1,float16,float16,0,0.0455375999212265
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,2,2,128,1,fp8,fp8,0,0.06357600092887879
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,2,1,128,1,float16,fp8,0,0.06370239853858947
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,2,2,128,1,float16,float16,0,0.03295679986476898
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,2,1,128,1,fp8,fp8,0,0.06196640133857727
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,2,2,128,1,float16,fp8,0,0.04121119976043701
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,2,2,128,1,fp8,fp8,0,0.04117439985275269
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,2,1,128,1,float16,float16,0,0.03246879875659943
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,2,1,128,1,float16,fp8,0,0.041201600432395936
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,2,1,128,1,fp8,fp8,0,0.04111840128898621
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,2,2,128,1,float16,float16,0,0.02934719920158386
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,2,2,128,1,float16,fp8,0,0.03482399880886078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,2,2,128,1,fp8,fp8,0,0.033055999875068666
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,2,1,128,1,float16,float16,0,0.02914080023765564
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,2,1,128,1,float16,fp8,0,0.03490720093250275
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,2,1,128,1,fp8,fp8,0,0.03306080102920532
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,2,2,128,1,float16,float16,0,0.028911998867988585
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,2,2,128,1,float16,fp8,0,0.030929601192474364
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,2,1,128,1,float16,float16,0,0.028799998760223388
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,2,2,128,1,fp8,fp8,0,0.030811199545860292
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,2,1,128,1,fp8,fp8,0,0.030814400315284728
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,2,1,128,1,float16,fp8,0,0.0308896005153656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,2,2,128,1,float16,float16,0,0.028896000981330872
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,2,2,128,1,fp8,fp8,0,0.028907200694084166
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,2,2,128,1,float16,fp8,0,0.02882080078125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,2,1,128,1,float16,float16,0,0.028905600309371948
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,2,1,128,1,float16,fp8,0,0.02887519896030426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,2,1,128,1,fp8,fp8,0,0.02876960039138794
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,2,1,128,1,float16,float16,0,0.13759360313415528
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,2,2,128,1,float16,float16,0,0.07869120240211487
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,2,1,128,1,float16,fp8,0,0.23171520233154297
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1024,2,1,128,1,fp8,fp8,0,0.23163039684295655
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,2,2,128,1,float16,fp8,0,0.12471200227737426
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,2,2,128,1,fp8,fp8,0,0.12454559803009033
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,2,1,128,1,float16,float16,0,0.07635520100593567
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,2,1,128,1,float16,fp8,0,0.12301759719848633
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,2,2,128,1,float16,float16,0,0.04706400036811829
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,2,1,128,1,fp8,fp8,0,0.12312480211257934
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,2,2,128,1,float16,fp8,0,0.06970400214195252
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,2,2,128,1,fp8,fp8,0,0.06899999976158142
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,2,1,128,1,float16,float16,0,0.04539520144462585
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,2,2,128,1,float16,float16,0,0.028324800729751586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,2,1,128,1,float16,fp8,0,0.06980159878730774
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,2,1,128,1,fp8,fp8,0,0.06929919719696045
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,2,2,128,1,fp8,fp8,0,0.03914879858493805
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,2,1,128,1,float16,float16,0,0.028809601068496705
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,2,2,128,1,float16,fp8,0,0.03915199935436249
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,2,2,128,1,float16,float16,0,0.02489600032567978
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,2,1,128,1,fp8,fp8,0,0.04079680144786835
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,2,1,128,1,float16,fp8,0,0.03916960060596466
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,2,2,128,1,float16,fp8,0,0.030878400802612303
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,2,2,128,1,fp8,fp8,0,0.030819201469421388
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,2,1,128,1,float16,float16,0,0.024742400646209715
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,2,1,128,1,float16,fp8,0,0.030900800228118898
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,2,2,128,1,float16,float16,0,0.022711999714374542
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,2,1,128,1,fp8,fp8,0,0.03089120090007782
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,2,2,128,1,float16,fp8,0,0.02680160105228424
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,2,2,128,1,fp8,fp8,0,0.0268528014421463
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,2,1,128,1,float16,float16,0,0.022806400060653688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,2,1,128,1,float16,fp8,0,0.026745599508285523
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,2,1,128,1,fp8,fp8,0,0.02671839892864227
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,2,2,128,1,float16,float16,0,0.022856000065803527
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,2,2,128,1,float16,fp8,0,0.02337439954280853
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,2,2,128,1,fp8,fp8,0,0.02388000041246414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,2,1,128,1,float16,float16,0,0.022763200104236603
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,2,1,128,1,fp8,fp8,0,0.02284640073776245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,2,1,128,1,float16,fp8,0,0.022873599827289582
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,2,2,128,1,float16,float16,0,0.022710399329662324
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,2,2,128,1,float16,fp8,0,0.02267040014266968
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,2,2,128,1,fp8,fp8,0,0.022864000499248506
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,2,1,128,1,float16,float16,0,0.022681599855422972
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,2,1,128,1,fp8,fp8,0,0.022779199481010436
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,2,1,128,1,float16,fp8,0,0.022705599665641785
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,2,1,128,1,float16,float16,0,0.10899200439453124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,2,1,128,1,float16,fp8,0,0.20728800296783448
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,512,2,1,128,1,fp8,fp8,0,0.20623679161071778
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,2,2,128,1,float16,float16,0,0.06189759969711304
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,2,1,128,1,float16,float16,0,0.06004480123519897
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,2,2,128,1,float16,fp8,0,0.1102255940437317
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,2,2,128,1,fp8,fp8,0,0.10887199640274048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,2,2,128,1,float16,float16,0,0.03702239990234375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,2,1,128,1,float16,fp8,0,0.10845439434051514
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,2,1,128,1,fp8,fp8,0,0.10868799686431885
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,2,2,128,1,fp8,fp8,0,0.059724801778793336
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,2,2,128,1,float16,fp8,0,0.05969600081443786
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,2,1,128,1,float16,float16,0,0.036180800199508666
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,2,2,128,1,float16,float16,0,0.022623999416828154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,2,1,128,1,float16,fp8,0,0.059640002250671384
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,2,1,128,1,fp8,fp8,0,0.059683197736740114
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,2,2,128,1,float16,fp8,0,0.03505440056324005
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,2,2,128,1,fp8,fp8,0,0.03503200113773346
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,2,1,128,1,float16,float16,0,0.022672000527381896
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,2,2,128,1,float16,float16,0,0.018695999681949616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,2,1,128,1,fp8,fp8,0,0.03499360084533691
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,2,1,128,1,float16,fp8,0,0.03500320017337799
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,2,2,128,1,float16,fp8,0,0.024809600412845613
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,2,1,128,1,float16,float16,0,0.01879200041294098
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,2,2,128,1,fp8,fp8,0,0.024792000651359558
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,2,1,128,1,float16,fp8,0,0.024854399263858795
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,2,1,128,1,fp8,fp8,0,0.024851199984550477
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,2,2,128,1,float16,float16,0,0.016755199432373045
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,2,2,128,1,float16,fp8,0,0.020772799849510193
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,2,2,128,1,fp8,fp8,0,0.020660799741744996
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,2,1,128,1,float16,float16,0,0.017972800135612487
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,2,1,128,1,float16,fp8,0,0.020734399557113647
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,2,2,128,1,float16,float16,0,0.016550399363040924
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,2,1,128,1,fp8,fp8,0,0.020750400424003602
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,2,2,128,1,float16,fp8,0,0.018542400002479552
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,2,1,128,1,float16,float16,0,0.016631999611854555
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,2,2,128,1,fp8,fp8,0,0.01866080015897751
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,2,1,128,1,float16,fp8,0,0.018555200099945067
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,2,2,128,1,float16,float16,0,0.016564799845218657
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,2,1,128,1,fp8,fp8,0,0.01858399957418442
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,2,2,128,1,float16,fp8,0,0.016625599563121797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,2,2,128,1,fp8,fp8,0,0.016547200083732606
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,2,1,128,1,float16,float16,0,0.016673600673675536
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,2,1,128,1,float16,fp8,0,0.016752000153064727
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,2,1,128,1,fp8,fp8,0,0.01653600037097931
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,2,2,128,1,float16,float16,0,0.01656319946050644
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,2,2,128,1,float16,fp8,0,0.01658719927072525
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,2,2,128,1,fp8,fp8,0,0.016551999747753142
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,2,1,128,1,float16,float16,0,0.01653439998626709
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,2,1,128,1,fp8,fp8,0,0.016590400040149687
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,2,1,128,1,float16,fp8,0,0.016616000235080718
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,2,1,128,1,float16,float16,0,0.05349439978599548
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,2,2,128,1,float16,float16,0,0.030932798981666565
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,2,1,128,1,float16,fp8,0,0.10068000555038452
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,256,2,1,128,1,fp8,fp8,0,0.10080959796905517
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,2,2,128,1,float16,fp8,0,0.055435198545455935
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,2,1,128,1,float16,float16,0,0.029281601309776306
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,2,2,128,1,fp8,fp8,0,0.05550240278244019
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,2,2,128,1,float16,float16,0,0.018646399676799773
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,2,1,128,1,float16,fp8,0,0.05551360249519348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,2,1,128,1,fp8,fp8,0,0.05550559759140015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,2,2,128,1,float16,fp8,0,0.030937600135803222
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,2,2,128,1,fp8,fp8,0,0.03099679946899414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,2,1,128,1,float16,float16,0,0.01873439997434616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,2,1,128,1,float16,fp8,0,0.030950400233268737
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,2,2,128,1,float16,float16,0,0.016441600024700166
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,2,1,128,1,fp8,fp8,0,0.031040000915527343
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,2,2,128,1,float16,fp8,0,0.02314720004796982
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,2,2,128,1,fp8,fp8,0,0.022673599421977997
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,2,1,128,1,float16,float16,0,0.016515199840068818
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,2,2,128,1,float16,float16,0,0.014612799882888794
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,2,1,128,1,fp8,fp8,0,0.022785599529743194
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,2,1,128,1,float16,fp8,0,0.022700800001621245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,2,2,128,1,fp8,fp8,0,0.018580800294876097
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,2,2,128,1,float16,fp8,0,0.018542400002479552
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,2,1,128,1,float16,float16,0,0.014499199390411378
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,2,1,128,1,fp8,fp8,0,0.018619200587272643
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,2,1,128,1,float16,fp8,0,0.018535999953746794
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,2,2,128,1,float16,float16,0,0.014532800018787383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,2,2,128,1,float16,fp8,0,0.015203200280666351
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,2,1,128,1,float16,float16,0,0.014451199769973755
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,2,2,128,1,fp8,fp8,0,0.015563200414180755
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,2,1,128,1,fp8,fp8,0,0.015307199954986573
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,2,1,128,1,float16,fp8,0,0.014641599357128143
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,2,2,128,1,float16,float16,0,0.014444799721240997
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,2,2,128,1,float16,fp8,0,0.014575999975204468
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,2,2,128,1,fp8,fp8,0,0.014555199444293976
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,2,1,128,1,float16,float16,0,0.014449599385261535
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,2,1,128,1,float16,fp8,0,0.01448799967765808
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,2,1,128,1,fp8,fp8,0,0.014564800262451171
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,2,2,128,1,float16,float16,0,0.012641599774360657
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,2,2,128,1,float16,fp8,0,0.014454400539398194
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,2,2,128,1,fp8,fp8,0,0.014542399346828461
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,2,1,128,1,float16,float16,0,0.01494400054216385
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,2,1,128,1,float16,fp8,0,0.013734400272369385
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,2,1,128,1,fp8,fp8,0,0.01451359987258911
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,2,2,128,1,float16,float16,0,0.01255200058221817
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,2,2,128,1,float16,fp8,0,0.01443839967250824
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,2,2,128,1,fp8,fp8,0,0.01257600039243698
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,2,1,128,1,float16,float16,0,0.014535999298095703
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,2,1,128,1,float16,fp8,0,0.01249760016798973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,2,1,128,1,fp8,fp8,0,0.014470399916172027
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,2,1,128,1,float16,float16,0,0.032492798566818235
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,2,1,128,1,float16,fp8,0,0.05751680135726929
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,2,2,128,1,float16,float16,0,0.02059199959039688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,128,2,1,128,1,fp8,fp8,0,0.05771999955177307
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,2,2,128,1,float16,fp8,0,0.03298240005970001
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,2,2,128,1,fp8,fp8,0,0.032979199290275575
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,2,1,128,1,float16,float16,0,0.020776000618934632
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,2,1,128,1,float16,fp8,0,0.03289600014686585
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,2,1,128,1,fp8,fp8,0,0.03300319910049439
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,2,2,128,1,float16,fp8,0,0.02067199945449829
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,2,2,128,1,float16,float16,0,0.014627200365066529
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,2,2,128,1,fp8,fp8,0,0.020686399936676026
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,2,1,128,1,float16,float16,0,0.014419199526309967
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,2,1,128,1,float16,fp8,0,0.020793600380420683
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,2,1,128,1,fp8,fp8,0,0.02064319998025894
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,2,2,128,1,float16,float16,0,0.012494400143623352
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,2,2,128,1,fp8,fp8,0,0.014528000354766845
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,2,2,128,1,float16,fp8,0,0.014761599898338317
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,2,1,128,1,float16,float16,0,0.01252480000257492
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,2,1,128,1,float16,fp8,0,0.014664000272750855
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,2,2,128,1,float16,float16,0,0.011020799726247787
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,2,1,128,1,fp8,fp8,0,0.01472959965467453
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,2,2,128,1,float16,fp8,0,0.0125231996178627
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,2,1,128,1,float16,float16,0,0.01067039966583252
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,2,1,128,1,float16,fp8,0,0.012535999715328216
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,2,2,128,1,fp8,fp8,0,0.01260959953069687
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,2,2,128,1,float16,float16,0,0.010441599786281586
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,2,2,128,1,float16,fp8,0,0.012492799758911132
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,2,1,128,1,fp8,fp8,0,0.012625600397586822
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,2,2,128,1,fp8,fp8,0,0.012412799894809723
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,2,1,128,1,float16,float16,0,0.010524799674749374
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,2,1,128,1,float16,fp8,0,0.012611199915409089
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,2,1,128,1,fp8,fp8,0,0.012486399710178375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,2,2,128,1,float16,float16,0,0.010468800365924836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,2,2,128,1,float16,fp8,0,0.01101439967751503
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,2,1,128,1,float16,float16,0,0.010497599840164185
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,2,2,128,1,fp8,fp8,0,0.011006399989128113
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,2,1,128,1,float16,fp8,0,0.010577599704265594
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,2,2,128,1,float16,float16,0,0.010422399640083313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,2,1,128,1,fp8,fp8,0,0.012439999729394913
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,2,1,128,1,float16,float16,0,0.010468800365924836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,2,2,128,1,fp8,fp8,0,0.010486400127410889
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,2,2,128,1,float16,fp8,0,0.010377600044012069
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,2,1,128,1,float16,fp8,0,0.010500799864530563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,2,1,128,1,fp8,fp8,0,0.01048159971833229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,2,2,128,1,float16,float16,0,0.010566399991512298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,2,2,128,1,float16,fp8,0,0.010496000200510025
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,2,2,128,1,fp8,fp8,0,0.010470400005578995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,2,1,128,1,float16,float16,0,0.010446400195360184
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,2,1,128,1,float16,fp8,0,0.010523200035095215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,2,1,128,1,fp8,fp8,0,0.010411199927330018
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,2,1,128,1,float16,float16,0,0.02478879988193512
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,2,1,128,1,float16,fp8,0,0.037150400876998904
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,64,2,1,128,1,fp8,fp8,0,0.037084800004959104
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,2,2,128,1,float16,float16,0,0.01664319932460785
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,2,2,128,1,float16,fp8,0,0.02285439968109131
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,2,2,128,1,fp8,fp8,0,0.022699199616909027
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,2,1,128,1,float16,float16,0,0.016569599509239197
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,2,1,128,1,float16,fp8,0,0.022787199914455415
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,2,2,128,1,float16,float16,0,0.012452799826860428
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,2,1,128,1,fp8,fp8,0,0.022708800435066224
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,2,2,128,1,float16,fp8,0,0.016494399309158324
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,2,1,128,1,float16,float16,0,0.012513600289821625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,2,2,128,1,fp8,fp8,0,0.014608000218868256
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,2,1,128,1,float16,fp8,0,0.015960000455379486
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,2,1,128,1,fp8,fp8,0,0.014575999975204468
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,2,2,128,1,float16,float16,0,0.01048320010304451
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,2,2,128,1,float16,fp8,0,0.01255200058221817
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,2,2,128,1,fp8,fp8,0,0.012518399953842163
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,2,1,128,1,float16,float16,0,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,2,1,128,1,float16,fp8,0,0.012571200728416443
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,2,1,128,1,fp8,fp8,0,0.0125231996178627
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,2,2,128,1,float16,float16,0,0.010382399708032609
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,2,1,128,1,float16,float16,0,0.010422399640083313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,2,2,128,1,fp8,fp8,0,0.012411200255155564
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,2,2,128,1,float16,fp8,0,0.01138240024447441
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,2,1,128,1,float16,fp8,0,0.010496000200510025
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,2,1,128,1,fp8,fp8,0,0.011307200044393539
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,2,2,128,1,float16,float16,0,0.010475199669599533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,2,2,128,1,float16,fp8,0,0.010382399708032609
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,2,1,128,1,float16,float16,0,0.010460799932479859
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,2,2,128,1,fp8,fp8,0,0.01053759977221489
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,2,1,128,1,float16,fp8,0,0.010598400235176086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,2,1,128,1,fp8,fp8,0,0.010425599664449692
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,2,2,128,1,float16,float16,0,0.010486400127410889
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,2,2,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,2,1,128,1,float16,float16,0,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,2,2,128,1,fp8,fp8,0,0.010520000010728836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,2,1,128,1,fp8,fp8,0,0.010393600165843963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,2,1,128,1,float16,fp8,0,0.010465600341558457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,2,2,128,1,float16,float16,0,0.010470400005578995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,2,2,128,1,float16,fp8,0,0.010416000336408614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,2,2,128,1,fp8,fp8,0,0.010567999631166457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,2,1,128,1,float16,float16,0,0.01040000021457672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,2,1,128,1,float16,fp8,0,0.010463999956846238
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,2,1,128,1,fp8,fp8,0,0.010777600109577179
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,2,2,128,1,float16,float16,0,0.010444799810647965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,2,2,128,1,float16,fp8,0,0.010353600233793258
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,2,2,128,1,fp8,fp8,0,0.010496000200510025
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,2,1,128,1,float16,float16,0,0.010384000092744827
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,2,1,128,1,float16,fp8,0,0.01048159971833229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,2,1,128,1,fp8,fp8,0,0.010428799688816071
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,2,1,128,1,float16,fp8,0,0.026825600862503053
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,2,1,128,1,float16,float16,0,0.022563199698925018
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,2,2,128,1,float16,float16,0,0.014638400077819825
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,32,2,1,128,1,fp8,fp8,0,0.026796799898147584
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,2,2,128,1,fp8,fp8,0,0.018563200533390046
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,2,1,128,1,float16,float16,0,0.014534400403499603
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,2,2,128,1,float16,fp8,0,0.018566399812698364
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,2,1,128,1,float16,fp8,0,0.018532800674438476
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,2,1,128,1,fp8,fp8,0,0.01732639968395233
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,2,2,128,1,float16,float16,0,0.012385600060224534
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,2,2,128,1,float16,fp8,0,0.012561599910259246
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,2,2,128,1,fp8,fp8,0,0.012476799637079239
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,2,1,128,1,float16,float16,0,0.012415999919176102
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,2,1,128,1,float16,fp8,0,0.012779200077056884
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,2,1,128,1,fp8,fp8,0,0.012503999471664428
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,2,2,128,1,float16,float16,0,0.010496000200510025
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,2,2,128,1,float16,fp8,0,0.011023999750614166
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,2,2,128,1,fp8,fp8,0,0.01043199971318245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,2,1,128,1,float16,float16,0,0.01058719977736473
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,2,1,128,1,float16,fp8,0,0.010539200156927109
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,2,1,128,1,fp8,fp8,0,0.010497599840164185
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,2,2,128,1,float16,float16,0,0.010475199669599533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,2,2,128,1,float16,fp8,0,0.010494399815797806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,2,1,128,1,float16,float16,0,0.010356800258159637
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,2,2,128,1,fp8,fp8,0,0.010377600044012069
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,2,1,128,1,float16,fp8,0,0.01048320010304451
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,2,2,128,1,float16,float16,0,0.010422399640083313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,2,1,128,1,fp8,fp8,0,0.010467199981212616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,2,2,128,1,float16,fp8,0,0.010407999902963639
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,2,1,128,1,float16,float16,0,0.010467199981212616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,2,1,128,1,float16,fp8,0,0.010500799864530563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,2,2,128,1,fp8,fp8,0,0.010391999781131745
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,2,1,128,1,fp8,fp8,0,0.010480000078678131
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,2,2,128,1,float16,float16,0,0.010487999767065048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,2,2,128,1,float16,fp8,0,0.010468800365924836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,2,2,128,1,fp8,fp8,0,0.010558400303125381
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,2,1,128,1,float16,float16,0,0.010556799918413162
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,2,1,128,1,float16,fp8,0,0.010393600165843963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,2,1,128,1,fp8,fp8,0,0.010356800258159637
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,2,2,128,1,float16,float16,0,0.010345599800348281
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,2,2,128,1,float16,fp8,0,0.010387200117111205
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,2,1,128,1,float16,float16,0,0.010335999727249145
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,2,2,128,1,fp8,fp8,0,0.010531199723482132
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,2,1,128,1,float16,fp8,0,0.010380800068378448
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,2,2,128,1,float16,float16,0,0.010377600044012069
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,2,2,128,1,float16,fp8,0,0.010443200170993806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,2,1,128,1,float16,float16,0,0.010444799810647965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,2,2,128,1,fp8,fp8,0,0.010388799756765366
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,2,1,128,1,fp8,fp8,0,0.010335999727249145
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,2,1,128,1,float16,fp8,0,0.010409600287675857
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,2,1,128,1,fp8,fp8,0,0.010417599976062775
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,2,1,128,1,float16,float16,0,0.020664000511169435
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,2,1,128,1,float16,fp8,0,0.022716799378395082
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,16,2,1,128,1,fp8,fp8,0,0.02279839962720871
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,2,2,128,1,float16,float16,0,0.014585599303245544
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,2,2,128,1,float16,fp8,0,0.014519999921321868
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,2,1,128,1,float16,float16,0,0.014696000516414643
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,2,2,128,1,fp8,fp8,0,0.015364800393581391
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,2,1,128,1,float16,fp8,0,0.014558400213718414
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,2,2,128,1,float16,float16,0,0.010590399801731109
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,2,1,128,1,fp8,fp8,0,0.016414399445056915
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,2,2,128,1,float16,fp8,0,0.012459199875593185
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,2,2,128,1,fp8,fp8,0,0.01241919994354248
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,2,1,128,1,float16,float16,0,0.011800000071525573
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,2,1,128,1,float16,fp8,0,0.01144160032272339
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,2,2,128,1,float16,float16,0,0.010486400127410889
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,2,1,128,1,fp8,fp8,0,0.012318400293588638
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,2,2,128,1,float16,fp8,0,0.01040479987859726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,2,2,128,1,fp8,fp8,0,0.01048479974269867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,2,1,128,1,float16,float16,0,0.010540799796581268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,2,1,128,1,float16,fp8,0,0.010377600044012069
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,2,1,128,1,fp8,fp8,0,0.010489600151777268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,2,2,128,1,float16,float16,0,0.010527999699115753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,2,2,128,1,float16,fp8,0,0.010363200306892395
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,2,2,128,1,fp8,fp8,0,0.010462400317192078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,2,1,128,1,float16,float16,0,0.010419200360774993
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,2,1,128,1,float16,fp8,0,0.01050880029797554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,2,1,128,1,fp8,fp8,0,0.010443200170993806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,2,2,128,1,float16,float16,0,0.010527999699115753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,2,2,128,1,fp8,fp8,0,0.010393600165843963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,2,2,128,1,float16,fp8,0,0.010425599664449692
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,2,1,128,1,float16,float16,0,0.010475199669599533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,2,1,128,1,float16,fp8,0,0.010416000336408614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,2,2,128,1,float16,float16,0,0.010443200170993806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,2,1,128,1,fp8,fp8,0,0.010371199995279311
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,2,2,128,1,float16,fp8,0,0.010332799702882766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,2,2,128,1,fp8,fp8,0,0.01035040020942688
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,2,1,128,1,float16,float16,0,0.010443200170993806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,2,1,128,1,float16,fp8,0,0.010435199737548828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,2,1,128,1,fp8,fp8,0,0.010491199791431427
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,2,2,128,1,float16,float16,0,0.010390400141477584
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,2,2,128,1,float16,fp8,0,0.010401599854230881
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,2,1,128,1,float16,float16,0,0.010407999902963639
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,2,2,128,1,fp8,fp8,0,0.010361599922180175
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,2,1,128,1,float16,fp8,0,0.010366400331258773
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,2,1,128,1,fp8,fp8,0,0.010465600341558457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,2,2,128,1,float16,float16,0,0.010443200170993806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,2,2,128,1,float16,fp8,0,0.010494399815797806
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,2,2,128,1,fp8,fp8,0,0.010367999970912933
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,2,1,128,1,float16,float16,0,0.010470400005578995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,2,1,128,1,float16,fp8,0,0.010416000336408614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,2,1,128,1,fp8,fp8,0,0.010390400141477584
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,2,1,128,1,float16,fp8,0,0.018739199638366698
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,2,1,128,1,float16,float16,0,0.018855999410152435
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,256,1,2,1,128,1,fp8,fp8,0,0.018699200451374055
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,2,2,128,1,float16,float16,0,0.014547200500965118
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,2,2,128,1,float16,fp8,0,0.014454400539398194
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,2,2,128,1,fp8,fp8,0,0.012494400143623352
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,2,1,128,1,float16,float16,0,0.014153599739074707
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,2,1,128,1,float16,fp8,0,0.014519999921321868
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,2,1,128,1,fp8,fp8,0,0.012559999525547028
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,2,2,128,1,float16,fp8,0,0.010531199723482132
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,2,2,128,1,float16,float16,0,0.010590399801731109
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,2,2,128,1,fp8,fp8,0,0.010395199805498124
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,2,1,128,1,float16,float16,0,0.010648000240325927
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,2,1,128,1,fp8,fp8,0,0.010462400317192078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,2,1,128,1,float16,fp8,0,0.010574399679899215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,2,2,128,1,float16,float16,0,0.010454399883747101
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,2,2,128,1,float16,fp8,0,0.01043360009789467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,2,1,128,1,float16,float16,0,0.010523200035095215
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,2,2,128,1,fp8,fp8,0,0.010556799918413162
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,2,1,128,1,float16,fp8,0,0.010449600219726563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,2,2,128,1,float16,float16,0,0.010331200063228607
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,2,2,128,1,float16,fp8,0,0.010444799810647965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,2,1,128,1,fp8,fp8,0,0.010355199873447418
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,2,2,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,2,1,128,1,float16,float16,0,0.010454399883747101
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,2,1,128,1,float16,fp8,0,0.011033599823713302
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,2,1,128,1,fp8,fp8,0,0.01043199971318245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,2,2,128,1,float16,float16,0,0.010449600219726563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,2,2,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,2,2,128,1,fp8,fp8,0,0.010384000092744827
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,2,1,128,1,float16,float16,0,0.010420800000429154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,2,1,128,1,float16,fp8,0,0.010369600355625152
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,2,1,128,1,fp8,fp8,0,0.010375999659299851
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,2,2,128,1,float16,float16,0,0.010470400005578995
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,2,2,128,1,float16,fp8,0,0.01048159971833229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,2,2,128,1,fp8,fp8,0,0.010436800122261048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,2,1,128,1,float16,float16,0,0.010419200360774993
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,2,1,128,1,float16,fp8,0,0.010438399761915207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,2,1,128,1,fp8,fp8,0,0.010454399883747101
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,2,2,128,1,float16,float16,0,0.010385599732398988
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,2,2,128,1,float16,fp8,0,0.010471999645233154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,2,1,128,1,float16,float16,0,0.010462400317192078
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,2,2,128,1,fp8,fp8,0,0.01045759990811348
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,2,1,128,1,float16,fp8,0,0.010449600219726563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,2,1,128,1,fp8,fp8,0,0.010452800244092942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,2,2,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,2,2,128,1,float16,fp8,0,0.010332799702882766
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,2,1,128,1,float16,float16,0,0.010337600111961364
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,2,1,128,1,float16,fp8,0,0.010382399708032609
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,2,2,128,1,fp8,fp8,0,0.010360000282526016
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,2,1,128,1,fp8,fp8,0,0.008766400068998337
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,1,1,128,1,float16,float16,0,0.332096004486084
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,1,1,128,1,float16,fp8,0,0.3528320074081421
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16384,1,1,128,1,fp8,fp8,0,0.3541327953338623
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,1,1,128,1,float16,float16,0,0.21125760078430175
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,1,1,128,1,float16,fp8,0,0.22275679111480712
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16384,1,1,128,1,fp8,fp8,0,0.22203359603881836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,1,1,128,1,float16,float16,0,0.19684319496154784
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,1,1,128,1,float16,fp8,0,0.2011120080947876
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16384,1,1,128,1,fp8,fp8,0,0.20106720924377441
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,1,1,128,1,float16,float16,0,0.21296160221099852
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,1,1,128,1,float16,fp8,0,0.23505918979644774
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,12288,1,1,128,1,fp8,fp8,0,0.2333904027938843
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,1,1,128,1,float16,float16,0,0.15779680013656616
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,1,1,128,1,float16,fp8,0,0.16882400512695311
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,12288,1,1,128,1,fp8,fp8,0,0.16808320283889772
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,1,1,128,1,float16,float16,0,0.1502128005027771
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,1,1,128,1,float16,fp8,0,0.15394400358200072
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,12288,1,1,128,1,fp8,fp8,0,0.15397599935531617
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,1,1,128,1,float16,float16,0,0.15622400045394896
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,1,1,128,1,float16,fp8,0,0.17665120363235473
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,10240,1,1,128,1,fp8,fp8,0,0.1755936026573181
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,1,1,128,1,float16,float16,0,0.13146400451660156
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,1,1,128,1,float16,fp8,0,0.14164799451828003
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,10240,1,1,128,1,fp8,fp8,0,0.14300639629364015
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,1,1,128,1,float16,float16,0,0.1273743987083435
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,1,1,128,1,float16,fp8,0,0.13118560314178468
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,10240,1,1,128,1,fp8,fp8,0,0.13121440410614013
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,1,1,128,1,float16,float16,0,0.19097119569778442
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,1,1,128,1,fp8,fp8,0,0.2246880054473877
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,8192,1,1,128,1,float16,fp8,0,0.22517919540405273
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,1,1,128,1,float16,float16,0,0.11698559522628785
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,1,1,128,1,float16,fp8,0,0.1354159951210022
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,1,1,128,1,float16,float16,0,0.10882719755172729
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,8192,1,1,128,1,fp8,fp8,0,0.13559839725494385
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,1,1,128,1,float16,fp8,0,0.11677119731903077
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,8192,1,1,128,1,fp8,fp8,0,0.11697920560836791
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,1,1,128,1,float16,float16,0,0.10511519908905029
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,1,1,128,1,fp8,fp8,0,0.10671679973602295
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,8192,1,1,128,1,float16,fp8,0,0.10673120021820068
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,1,1,128,1,float16,float16,0,0.12390240430831909
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,1,1,128,1,float16,fp8,0,0.15134400129318237
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,6144,1,1,128,1,fp8,fp8,0,0.15189919471740723
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,1,1,128,1,float16,float16,0,0.0888368010520935
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,1,1,128,1,fp8,fp8,0,0.10464639663696289
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,1,1,128,1,float16,float16,0,0.08437920212745667
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,6144,1,1,128,1,float16,fp8,0,0.10262880325317383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,1,1,128,1,float16,fp8,0,0.09031519889831544
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,1,1,128,1,float16,float16,0,0.08214399814605713
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,6144,1,1,128,1,fp8,fp8,0,0.09039520025253296
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,1,1,128,1,float16,fp8,0,0.08415679931640625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,6144,1,1,128,1,fp8,fp8,0,0.08426079750061036
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,1,1,128,1,float16,float16,0,0.11458400487899781
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,1,1,128,1,float16,fp8,0,0.1566864013671875
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,1,1,128,1,float16,float16,0,0.06944479942321777
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,4096,1,1,128,1,fp8,fp8,0,0.15741759538650513
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,1,1,128,1,float16,fp8,0,0.09183520078659058
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,1,1,128,1,float16,float16,0,0.0618287980556488
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,4096,1,1,128,1,fp8,fp8,0,0.09188799858093262
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,1,1,128,1,float16,fp8,0,0.07217599749565125
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,4096,1,1,128,1,fp8,fp8,0,0.07243040204048157
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,1,1,128,1,float16,float16,0,0.05982080101966858
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,1,1,128,1,float16,fp8,0,0.06366879940032959
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,4096,1,1,128,1,fp8,fp8,0,0.06367840170860291
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,1,1,128,1,float16,fp8,0,0.059596800804138185
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,1,1,128,1,fp8,fp8,0,0.059620797634124756
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,4096,1,1,128,1,float16,float16,0,0.05812320113182068
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,1,1,128,1,float16,float16,0,0.07530879974365234
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,1,1,128,1,float16,fp8,0,0.11028640270233155
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,3072,1,1,128,1,fp8,fp8,0,0.1107151985168457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,1,1,128,1,float16,float16,0,0.053420799970626834
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,1,1,128,1,float16,fp8,0,0.0698527991771698
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,3072,1,1,128,1,fp8,fp8,0,0.07175999879837036
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,1,1,128,1,float16,float16,0,0.04936800003051758
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,1,1,128,1,float16,fp8,0,0.05754240155220032
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,1,1,128,1,float16,float16,0,0.04727360010147095
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,3072,1,1,128,1,fp8,fp8,0,0.05769439935684204
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,1,1,128,1,float16,fp8,0,0.04945279955863953
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,3072,1,1,128,1,fp8,fp8,0,0.049351999163627626
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,1,1,128,1,float16,float16,0,0.047444799542427064
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,1,1,128,1,float16,fp8,0,0.04733439981937408
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,3072,1,1,128,1,fp8,fp8,0,0.04719040095806122
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,1,1,128,1,float16,float16,0,0.07514240145683289
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,1,1,128,1,float16,fp8,0,0.12103519439697266
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,1,1,128,1,float16,float16,0,0.04543200135231018
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,2048,1,1,128,1,fp8,fp8,0,0.1212656021118164
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,1,1,128,1,float16,fp8,0,0.0678160011768341
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,2048,1,1,128,1,fp8,fp8,0,0.06819679737091064
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,1,1,128,1,float16,float16,0,0.03907040059566498
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,1,1,128,1,float16,fp8,0,0.04940159916877747
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,1,1,128,1,float16,float16,0,0.03707520067691803
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,2048,1,1,128,1,fp8,fp8,0,0.05036960244178772
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,1,1,128,1,float16,fp8,0,0.04118239879608154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,2048,1,1,128,1,fp8,fp8,0,0.04121600091457367
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,1,1,128,1,float16,float16,0,0.03524639904499054
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,1,1,128,1,float16,fp8,0,0.037064000964164734
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,2048,1,1,128,1,fp8,fp8,0,0.03704800009727478
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,1,1,128,1,float16,float16,0,0.035123199224472046
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,1,1,128,1,float16,fp8,0,0.03510879874229431
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,2048,1,1,128,1,fp8,fp8,0,0.03554719984531403
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,1,1,128,1,float16,float16,0,0.05362719893455505
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,1,1,128,1,float16,fp8,0,0.08876799941062927
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1536,1,1,128,1,fp8,fp8,0,0.08862720131874084
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,1,1,128,1,float16,float16,0,0.03525600135326386
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,1,1,128,1,float16,fp8,0,0.05356320142745972
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1536,1,1,128,1,fp8,fp8,0,0.05354239940643311
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,1,1,128,1,float16,float16,0,0.03110400140285492
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,1,1,128,1,float16,fp8,0,0.0397487998008728
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1536,1,1,128,1,fp8,fp8,0,0.041142401099205014
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,1,1,128,1,float16,float16,0,0.02922239899635315
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,1,1,128,1,float16,fp8,0,0.03308480083942413
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1536,1,1,128,1,fp8,fp8,0,0.03299840092658997
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,1,1,128,1,float16,float16,0,0.028859201073646545
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,1,1,128,1,float16,fp8,0,0.030910399556159974
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1536,1,1,128,1,fp8,fp8,0,0.0308896005153656
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,1,1,128,1,float16,float16,0,0.028777599334716797
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,1,1,128,1,float16,fp8,0,0.02884800136089325
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1536,1,1,128,1,fp8,fp8,0,0.028798401355743408
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,1,1,128,1,float16,float16,0,0.05562559962272644
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,1,1,128,1,float16,fp8,0,0.10473439693450928
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1024,1,1,128,1,fp8,fp8,0,0.10432640314102173
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,1,1,128,1,float16,float16,0,0.03295679986476898
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,1,1,128,1,float16,fp8,0,0.057627201080322266
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1024,1,1,128,1,fp8,fp8,0,0.05748159885406494
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,1,1,128,1,float16,float16,0,0.02688640058040619
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,1,1,128,1,float16,fp8,0,0.03917919993400574
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1024,1,1,128,1,fp8,fp8,0,0.03915840089321136
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,1,1,128,1,float16,float16,0,0.02476319968700409
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,1,1,128,1,float16,fp8,0,0.030876800417900085
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1024,1,1,128,1,fp8,fp8,0,0.030921599268913268
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,1,1,128,1,float16,float16,0,0.022710399329662324
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,1,1,128,1,float16,fp8,0,0.026817598938941957
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1024,1,1,128,1,fp8,fp8,0,0.026867198944091796
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,1,1,128,1,float16,float16,0,0.022745600342750548
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,1,1,128,1,float16,fp8,0,0.024702399969100952
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1024,1,1,128,1,fp8,fp8,0,0.02423200011253357
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,1,1,128,1,float16,fp8,0,0.022771200537681578
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,1,1,128,1,float16,float16,0,0.022702400386333466
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1024,1,1,128,1,fp8,fp8,0,0.022726400196552275
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,1,1,128,1,float16,float16,0,0.04534400105476379
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,1,1,128,1,float16,fp8,0,0.09441919922828675
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,1,1,128,1,float16,float16,0,0.02680639922618866
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,512,1,1,128,1,fp8,fp8,0,0.09444640278816223
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,1,1,128,1,fp8,fp8,0,0.05146719813346863
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,512,1,1,128,1,float16,fp8,0,0.05143679976463318
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,1,1,128,1,float16,float16,0,0.020691199600696562
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,1,1,128,1,fp8,fp8,0,0.03296479880809784
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,512,1,1,128,1,float16,fp8,0,0.03385440111160278
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,1,1,128,1,float16,float16,0,0.018612800538539885
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,1,1,128,1,float16,fp8,0,0.024715200066566467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,1,1,128,1,float16,float16,0,0.016628800332546233
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,512,1,1,128,1,fp8,fp8,0,0.024857600033283234
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,1,1,128,1,float16,fp8,0,0.020703999698162077
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,512,1,1,128,1,fp8,fp8,0,0.020652799308300017
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,1,1,128,1,float16,float16,0,0.016622400283813475
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,1,1,128,1,float16,fp8,0,0.018628799915313722
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,1,1,128,1,float16,float16,0,0.01659200042486191
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,512,1,1,128,1,fp8,fp8,0,0.018595199286937713
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,1,1,128,1,float16,fp8,0,0.016575999557971954
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,1,1,128,1,float16,float16,0,0.016545599699020384
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,512,1,1,128,1,fp8,fp8,0,0.016651199758052827
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,1,1,128,1,float16,fp8,0,0.016620799899101257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,512,1,1,128,1,fp8,fp8,0,0.016494399309158324
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,1,1,128,1,float16,float16,0,0.02281759977340698
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,1,1,128,1,float16,fp8,0,0.0493120014667511
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,256,1,1,128,1,fp8,fp8,0,0.04933759868144989
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,1,1,128,1,float16,float16,0,0.018651199340820313
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,1,1,128,1,float16,fp8,0,0.030881598591804504
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,256,1,1,128,1,fp8,fp8,0,0.03089759945869446
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,1,1,128,1,float16,float16,0,0.016484799981117248
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,1,1,128,1,float16,fp8,0,0.022700800001621245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,256,1,1,128,1,fp8,fp8,0,0.022729599475860597
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,1,1,128,1,float16,float16,0,0.01465120017528534
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,1,1,128,1,float16,float16,0,0.014505599439144135
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,1,1,128,1,float16,fp8,0,0.018572799861431122
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,256,1,1,128,1,fp8,fp8,0,0.018572799861431122
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,1,1,128,1,float16,fp8,0,0.01653279960155487
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,256,1,1,128,1,fp8,fp8,0,0.01499200016260147
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,1,1,128,1,float16,float16,0,0.01451359987258911
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,1,1,128,1,float16,fp8,0,0.01467359960079193
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,1,1,128,1,float16,float16,0,0.014486399292945863
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,256,1,1,128,1,fp8,fp8,0,0.014483200013637542
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,1,1,128,1,float16,fp8,0,0.014457599818706512
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,256,1,1,128,1,fp8,fp8,0,0.01451359987258911
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,1,1,128,1,float16,float16,0,0.013275200128555298
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,1,1,128,1,float16,fp8,0,0.01430879980325699
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,256,1,1,128,1,fp8,fp8,0,0.013247999548912048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,1,1,128,1,float16,float16,0,0.016575999557971954
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,1,1,128,1,float16,fp8,0,0.028841599822044373
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,128,1,1,128,1,fp8,fp8,0,0.028863999247550964
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,1,1,128,1,float16,float16,0,0.01249760016798973
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,1,1,128,1,float16,fp8,0,0.02067999988794327
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,1,1,128,1,float16,float16,0,0.012387199699878693
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,128,1,1,128,1,fp8,fp8,0,0.01863519996404648
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,1,1,128,1,float16,fp8,0,0.014636799693107605
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,1,1,128,1,float16,float16,0,0.010451199859380722
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,128,1,1,128,1,fp8,fp8,0,0.014590400457382201
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,1,1,128,1,float16,fp8,0,0.012615999579429627
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,128,1,1,128,1,fp8,fp8,0,0.012464000284671784
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,1,1,128,1,float16,float16,0,0.01048479974269867
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,1,1,128,1,float16,fp8,0,0.012513600289821625
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,128,1,1,128,1,fp8,fp8,0,0.012486399710178375
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,1,1,128,1,float16,float16,0,0.010436800122261048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,1,1,128,1,float16,fp8,0,0.011574400216341018
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,128,1,1,128,1,fp8,fp8,0,0.010435199737548828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,1,1,128,1,float16,float16,0,0.010409600287675857
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,1,1,128,1,float16,fp8,0,0.010518400371074677
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,128,1,1,128,1,fp8,fp8,0,0.010411199927330018
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,1,1,128,1,float16,float16,0,0.01037919968366623
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,1,1,128,1,float16,fp8,0,0.010556799918413162
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,128,1,1,128,1,fp8,fp8,0,0.010452800244092942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,1,1,128,1,float16,float16,0,0.0125231996178627
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,1,1,128,1,fp8,fp8,0,0.020483200252056123
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,64,1,1,128,1,float16,fp8,0,0.020628799498081208
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,1,1,128,1,float16,float16,0,0.01236959993839264
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,1,1,128,1,float16,fp8,0,0.014532800018787383
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,1,1,128,1,float16,float16,0,0.010487999767065048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,64,1,1,128,1,fp8,fp8,0,0.0146479994058609
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,1,1,128,1,float16,fp8,0,0.012439999729394913
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,64,1,1,128,1,fp8,fp8,0,0.012415999919176102
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,1,1,128,1,float16,float16,0,0.010527999699115753
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,1,1,128,1,float16,fp8,0,0.010424000024795533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,1,1,128,1,float16,float16,0,0.01064319983124733
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,64,1,1,128,1,fp8,fp8,0,0.010598400235176086
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,1,1,128,1,float16,fp8,0,0.010424000024795533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,1,1,128,1,float16,float16,0,0.01040479987859726
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,64,1,1,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,1,1,128,1,float16,fp8,0,0.010401599854230881
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,64,1,1,128,1,fp8,fp8,0,0.010451199859380722
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,1,1,128,1,float16,float16,0,0.010371199995279311
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,1,1,128,1,float16,fp8,0,0.010351999849081039
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,64,1,1,128,1,fp8,fp8,0,0.010345599800348281
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,1,1,128,1,float16,float16,0,0.010384000092744827
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,1,1,128,1,fp8,fp8,0,0.010436800122261048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,64,1,1,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,1,1,128,1,float16,float16,0,0.01242239996790886
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,1,1,128,1,float16,fp8,0,0.014521600306034088
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,32,1,1,128,1,fp8,fp8,0,0.014628799259662628
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,1,1,128,1,float16,float16,0,0.010502400249242783
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,1,1,128,1,float16,fp8,0,0.012409599870443344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,32,1,1,128,1,fp8,fp8,0,0.012617599964141846
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,1,1,128,1,float16,float16,0,0.010532800108194351
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,1,1,128,1,float16,fp8,0,0.01048320010304451
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,1,1,128,1,float16,float16,0,0.010468800365924836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,32,1,1,128,1,fp8,fp8,0,0.011164800077676774
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,1,1,128,1,float16,fp8,0,0.010444799810647965
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,32,1,1,128,1,fp8,fp8,0,0.010475199669599533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,1,1,128,1,float16,fp8,0,0.010449600219726563
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,1,1,128,1,float16,float16,0,0.010516799986362457
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,32,1,1,128,1,fp8,fp8,0,0.010468800365924836
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,1,1,128,1,float16,float16,0,0.010447999835014344
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,1,1,128,1,float16,fp8,0,0.010662399977445603
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,1,1,128,1,float16,float16,0,0.010436800122261048
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,32,1,1,128,1,fp8,fp8,0,0.010425599664449692
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,1,1,128,1,float16,fp8,0,0.010622400045394897
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,32,1,1,128,1,fp8,fp8,0,0.010388799756765366
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,1,1,128,1,float16,float16,0,0.010372799634933472
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,1,1,128,1,fp8,fp8,0,0.010366400331258773
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,32,1,1,128,1,float16,fp8,0,0.010452800244092942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,1,1,128,1,float16,float16,0,0.0125231996178627
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,1,1,128,1,float16,fp8,0,0.012459199875593185
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,1,1,128,1,float16,float16,0,0.010503999888896942
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,16,1,1,128,1,fp8,fp8,0,0.012675200402736665
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,1,1,128,1,float16,fp8,0,0.010435199737548828
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,16,1,1,128,1,fp8,fp8,0,0.01051200032234192
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,1,1,128,1,float16,fp8,0,0.010398399829864503
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,1,1,128,1,float16,float16,0,0.010619200021028518
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,16,1,1,128,1,fp8,fp8,0,0.010604800283908844
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,1,1,128,1,float16,float16,0,0.010438399761915207
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,1,1,128,1,float16,fp8,0,0.01045600026845932
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,16,1,1,128,1,fp8,fp8,0,0.010416000336408614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,1,1,128,1,float16,float16,0,0.010428799688816071
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,1,1,128,1,float16,fp8,0,0.010387200117111205
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,16,1,1,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,1,1,128,1,float16,fp8,0,0.010393600165843963
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,1,1,128,1,float16,float16,0,0.010424000024795533
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,16,1,1,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,1,1,128,1,float16,float16,0,0.010412800312042236
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,1,1,128,1,float16,fp8,0,0.01043199971318245
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,16,1,1,128,1,fp8,fp8,0,0.010427200049161912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,1,1,128,1,float16,fp8,0,0.010351999849081039
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,1,1,128,1,float16,float16,0,0.010388799756765366
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,16,1,1,128,1,fp8,fp8,0,0.01045600026845932
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,1,1,128,1,float16,float16,0,0.010428799688816071
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,1,1,128,1,float16,fp8,0,0.010556799918413162
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,128,1,1,1,128,1,fp8,fp8,0,0.01053759977221489
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,1,1,128,1,float16,float16,0,0.010486400127410889
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,1,1,128,1,float16,fp8,0,0.01043040007352829
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,64,1,1,1,128,1,fp8,fp8,0,0.010427200049161912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,1,1,128,1,float16,float16,0,0.010446400195360184
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,1,1,128,1,float16,fp8,0,0.010363200306892395
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,32,1,1,1,128,1,fp8,fp8,0,0.010401599854230881
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,1,1,128,1,float16,float16,0,0.010407999902963639
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,1,1,128,1,float16,fp8,0,0.01048159971833229
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,16,1,1,1,128,1,fp8,fp8,0,0.010401599854230881
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,1,1,128,1,float16,float16,0,0.010427200049161912
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,1,1,128,1,float16,fp8,0,0.010396800190210342
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,8,1,1,1,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,1,1,128,1,float16,float16,0,0.010364799946546554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,1,1,128,1,float16,fp8,0,0.010313600301742554
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,1,1,128,1,float16,float16,0,0.010416000336408614
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,4,1,1,1,128,1,fp8,fp8,0,0.010377600044012069
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,1,1,128,1,float16,fp8,0,0.010382399708032609
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,2,1,1,1,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,1,1,128,1,float16,float16,0,0.010471999645233154
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,1,1,128,1,float16,fp8,0,0.010342399775981902
SGLang,0.5.8.post1,NVIDIA GB200,context_attention,trtllm_mha,1,1,1,1,128,1,fp8,fp8,0,0.010463999956846238
