framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,1,1,0,0.012603199481964112
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,1,8,0,0.01255359947681427
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,1,16,0,0.01252480000257492
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,1,4,0,0.012595200538635254
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,1,2,0,0.01263359934091568
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,1,32,0,0.013769599795341491
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,1,64,0,0.012529599666595458
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,1,1,0,0.018833599984645844
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,1,2,0,0.020713600516319274
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,1,4,0,0.018745599687099455
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,1,16,0,0.020755200088024138
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,1,32,0,0.01934880018234253
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,1,8,0,0.01870719939470291
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,1,64,0,0.01871040016412735
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,16,1,0,0.014655999839305878
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,16,2,0,0.013937599956989288
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,16,4,0,0.013583999872207642
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,16,8,0,0.014633600413799287
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,16,32,0,0.01361439973115921
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,16,16,0,0.014606399834156037
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,16,64,0,0.012564800679683685
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,16,1,0,0.022777600586414336
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,16,2,0,0.02083200067281723
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,16,4,0,0.020660799741744996
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,16,8,0,0.020755200088024138
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,16,16,0,0.020768000185489653
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,16,64,0,0.019687999784946442
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,32,1,0,0.014681600034236908
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,16,32,0,0.02082560062408447
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,32,8,0,0.014684799313545226
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,32,2,0,0.014569599926471711
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,32,4,0,0.014587199687957764
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,32,32,0,0.01250080019235611
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,32,16,0,0.014593599736690522
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,32,64,0,0.014601600170135499
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,32,2,0,0.020798400044441223
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,32,4,0,0.020750400424003602
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,32,1,0,0.022703999280929567
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,32,32,0,0.020720000565052032
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,32,8,0,0.022129599750041962
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,32,16,0,0.020739200711250304
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,64,2,0,0.014688000082969666
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,32,64,0,0.018673600256443025
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,64,1,0,0.016470399498939515
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,64,4,0,0.014537599682807923
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,64,8,0,0.014519999921321868
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,64,16,0,0.014428800344467163
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,64,64,0,0.014550399780273438
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,64,32,0,0.014508800208568573
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,64,1,0,0.024747200310230255
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,64,4,0,0.02276480048894882
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,64,2,0,0.022784000635147093
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,64,8,0,0.022617599368095397
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,64,16,0,0.022536000609397887
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,64,32,0,0.020715199410915375
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,64,64,0,0.02067199945449829
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,128,1,0,0.01656160056591034
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,128,4,0,0.016436800360679626
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,128,2,0,0.016599999368190767
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,128,8,0,0.014455999433994293
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,128,16,0,0.0146479994058609
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,128,32,0,0.014519999921321868
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,128,64,0,0.014604799449443817
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,128,1,0,0.026873600482940675
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,128,2,0,0.022755199670791627
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,128,4,0,0.024716800451278685
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,128,8,0,0.022899200022220612
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,128,16,0,0.022787199914455415
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,128,32,0,0.02255360037088394
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,128,64,0,0.022672000527381896
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,256,1,0,0.02473759949207306
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,256,2,0,0.02072480022907257
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,256,4,0,0.018705600500106813
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,256,8,0,0.018646399676799773
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,256,16,0,0.018641600012779237
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,256,32,0,0.01842560023069382
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,256,64,0,0.016571199893951415
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,256,1,0,0.03296639919281006
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,256,2,0,0.030180799961090087
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,256,4,0,0.026836800575256347
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,256,8,0,0.02667199969291687
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,256,16,0,0.024857600033283234
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,256,32,0,0.024798400700092316
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,256,64,0,0.02680639922618866
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,512,2,0,0.0268095999956131
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,512,1,0,0.030937600135803222
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,512,4,0,0.0227743998169899
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,512,16,0,0.0208064004778862
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,512,8,0,0.02263039946556091
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,512,1,0,0.04322879910469055
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,512,64,0,0.020750400424003602
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,512,32,0,0.020729599893093108
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,512,2,0,0.03516159951686859
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,512,4,0,0.030959999561309813
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,512,8,0,0.02887200117111206
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,512,16,0,0.02886880040168762
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,512,64,0,0.02882240116596222
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,512,32,0,0.028937599062919615
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,1024,1,0,0.061617600917816165
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,1024,2,0,0.03903520107269287
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,1024,4,0,0.03490079939365387
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,1024,8,0,0.030902400612831116
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,1024,16,0,0.02903040051460266
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,1024,32,0,0.02880159914493561
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,1024,64,0,0.02717440128326416
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,1024,1,0,0.07597919702529907
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,1024,2,0,0.0494159996509552
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,1024,4,0,0.04309599995613098
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,1024,8,0,0.037108799815177916
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,1024,16,0,0.03704639971256256
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,1024,32,0,0.035068801045417784
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,1024,64,0,0.035011199116706845
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,1536,1,0,0.09862880110740661
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,1536,2,0,0.06119999885559082
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,1536,4,0,0.04312480092048645
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,1536,8,0,0.039129599928855896
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,1536,16,0,0.037108799815177916
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,1536,32,0,0.03698720037937164
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,1536,64,0,0.035673600435256955
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,1536,1,0,0.11329599618911743
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,1536,2,0,0.07219039797782897
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,1536,4,0,0.05139039754867554
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,1536,8,0,0.045326399803161624
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,1536,16,0,0.04319039881229401
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,1536,32,0,0.043145599961280826
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,1536,64,0,0.04113759994506836
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,2048,1,0,0.14008320569992067
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,2048,2,0,0.08510400056838989
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,2048,8,0,0.04917440116405487
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,2048,4,0,0.05362880229949951
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,2048,16,0,0.04522559940814972
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,2048,32,0,0.04318400025367737
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,2048,64,0,0.0420879989862442
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,2048,1,0,0.15453280210494996
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,2048,2,0,0.09441279768943786
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,2048,4,0,0.06166399717330932
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,2048,8,0,0.055580800771713255
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,2048,16,0,0.05135999917984009
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,2048,32,0,0.04935519993305206
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,2048,64,0,0.049342399835586546
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,3072,1,0,0.24040639400482178
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,3072,2,0,0.14052640199661254
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,3072,8,0,0.06393920183181763
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,3072,4,0,0.09008960127830505
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,3072,16,0,0.059724801778793336
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,3072,64,0,0.05760480165481567
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,3072,32,0,0.05760319828987122
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,3072,1,0,0.2530848026275635
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,3072,8,0,0.0718671977519989
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,3072,4,0,0.09483519792556763
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,3072,2,0,0.1500831961631775
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,3072,16,0,0.06581119894981384
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,3072,32,0,0.06362879872322083
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,3072,64,0,0.06177759766578674
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,4096,2,0,0.2113663911819458
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,4096,4,0,0.1312608003616333
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,4096,1,0,0.3616688013076782
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,4096,8,0,0.0840831995010376
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,4096,16,0,0.07606880068778991
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,4096,32,0,0.07200800180435181
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,4096,64,0,0.07185440063476563
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,4096,8,0,0.0889952003955841
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,4096,4,0,0.1345952033996582
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,4096,2,0,0.21316320896148683
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,4096,32,0,0.07598879933357239
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,4096,16,0,0.08065279722213745
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,4096,64,0,0.07502080202102661
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,6144,4,0,0.2273263931274414
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,6144,2,0,0.3819375991821289
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,6144,8,0,0.14579360485076903
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,6144,16,0,0.10811840295791626
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,6144,1,0,0.6723792076110839
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,6144,32,0,0.10258239507675171
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,6144,64,0,0.1006127953529358
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,6144,4,0,0.2224560022354126
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,6144,2,0,0.36953439712524416
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,6144,8,0,0.14364320039749146
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,6144,1,0,0.6516111850738525
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,6144,16,0,0.11045119762420655
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,6144,32,0,0.10462080240249634
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,6144,64,0,0.10184320211410522
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,8192,4,0,0.34916799068450927
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,8192,2,0,0.5964960098266602
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,8192,8,0,0.22121601104736327
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,8192,16,0,0.14151999950408936
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,8192,1,0,1.100926399230957
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,8192,32,0,0.13344000577926635
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,8192,64,0,0.1274415969848633
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,8192,4,0,0.3287807941436768
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,8192,2,0,0.5559535980224609
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,8192,8,0,0.21098558902740477
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,8192,16,0,0.13971199989318847
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,8192,32,0,0.13137760162353515
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,8192,64,0,0.12724640369415283
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,8192,1,0,1.1392064094543457
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,10240,4,0,0.49233598709106446
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,10240,2,0,0.8431247711181641
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,10240,8,0,0.3030927896499634
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,10240,16,0,0.1868224024772644
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,10240,32,0,0.16215840578079224
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,10240,64,0,0.15784800052642822
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,10240,1,0,1.666329574584961
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,10240,2,0,0.9475744247436524
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,10240,1,0,1.4272895812988282
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,10240,4,0,0.5399424076080322
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,10240,8,0,0.28171839714050295
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,10240,16,0,0.21560161113739013
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,10240,32,0,0.15972000360488892
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,10240,64,0,0.152401602268219
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,12288,2,0,1.1513312339782715
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,12288,1,0,2.129136085510254
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,12288,4,0,0.6512127876281738
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,12288,16,0,0.2528496026992798
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,12288,8,0,0.5070064067840576
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,12288,32,0,0.19308639764785768
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,12288,64,0,0.18899680376052858
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,4096,1,0,0.3623023986816406
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,12288,4,0,0.5857183933258057
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,12288,2,0,1.0353872299194335
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,12288,16,0,0.23718879222869874
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,12288,8,0,0.35697760581970217
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,12288,32,0,0.1864367961883545
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,12288,64,0,0.17984800338745116
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,12288,1,0,1.9261056900024414
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,16384,8,0,0.6149919986724853
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,16384,16,0,0.3981760025024414
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,16384,4,0,1.0453311920166015
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,16384,32,0,0.2636831998825073
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,16384,64,0,0.2415424108505249
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,16384,2,0,2.0729551315307617
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,16384,4,0,1.3560784339904786
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,16384,2,0,1.7365087509155273
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,16384,8,0,0.5432703971862793
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,16384,16,0,0.4183568000793457
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,16384,1,0,4.889257431030273
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,16384,32,0,0.24401440620422363
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,16384,64,0,0.2735775947570801
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,16384,1,0,3.224030303955078
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,32768,8,0,2.02270393371582
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,32768,16,0,1.3270303726196289
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,32768,32,0,0.8150143623352051
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,32768,4,0,4.721984100341797
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,1,32768,64,0,0.5735487937927246
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,32768,2,0,10.246441650390626
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,32768,2,0,6.314499282836914
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,32768,4,0,3.1673904418945313
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,32768,8,0,1.795172882080078
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,32768,16,0,1.022326374053955
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,32768,32,0,0.6593776226043702
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,1,1,0,0.014556799829006196
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,1,2,0,0.014504000544548035
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,1,32768,64,0,0.44298720359802246
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,1,4,0,0.012532800436019897
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,1,8,0,0.012457600235939026
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,1,16,0,0.012457600235939026
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,1,32,0,0.012470400333404541
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,1,64,0,0.012483199685811996
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,1,1,0,0.02266719937324524
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,1,2,0,0.022599999606609345
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,1,4,0,0.02064799964427948
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,1,8,0,0.020715199410915375
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,1,16,0,0.020662400126457214
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,1,32,0,0.02072319984436035
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,1,64,0,0.018774400651454925
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,16,1,0,0.014532800018787383
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,16,2,0,0.014486399292945863
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,16,4,0,0.012521600723266602
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,16,8,0,0.013568000495433807
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,16,16,0,0.014511999487876893
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,16,32,0,0.014507199823856353
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,16,64,0,0.012620800733566284
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,16,1,0,0.022724799811840057
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,16,2,0,0.02072319984436035
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,16,4,0,0.020718400180339814
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,16,16,0,0.02080159932374954
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,16,8,0,0.02065120041370392
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,16,32,0,0.02067199945449829
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,16,64,0,0.020764799416065217
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,32,2,0,0.01446560025215149
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,32,1,0,0.01658399999141693
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,32,4,0,0.014537599682807923
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,32,8,0,0.01449279934167862
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,32,16,0,0.012887999415397644
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,32,32,0,0.014552000164985656
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,32,64,0,0.014524799585342408
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,32,1,0,0.024694399535655977
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,32,2,0,0.022732800245285033
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,32,4,0,0.02067359983921051
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,32,8,0,0.022708800435066224
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,32,16,0,0.02258719950914383
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,32,32,0,0.020681600272655486
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,32,64,0,0.020683200657367708
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,64,1,0,0.016569599509239197
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,64,2,0,0.01652639955282211
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,64,4,0,0.014519999921321868
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,64,8,0,0.014632000029087067
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,64,16,0,0.014519999921321868
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,64,32,0,0.01451520025730133
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,64,64,0,0.014502400159835815
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,64,1,0,0.024803200364112855
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,64,2,0,0.024822400510311128
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,64,4,0,0.022631999850273133
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,64,8,0,0.022697600722312927
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,64,16,0,0.02072480022907257
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,64,32,0,0.02268480062484741
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,64,64,0,0.022644799947738648
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,128,1,0,0.020641599595546723
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,128,2,0,0.016598400473594666
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,32768,1,0,13.878810119628906
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,128,4,0,0.016550399363040924
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,128,8,0,0.016487999260425566
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,128,32,0,0.014577600359916686
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,128,16,0,0.018699200451374055
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,128,64,0,0.014521600306034088
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,128,2,0,0.026830399036407472
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,128,1,0,0.039263999462127684
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,128,4,0,0.024817599356174468
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,128,8,0,0.029607999324798583
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,128,32,0,0.026924800872802735
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,128,16,0,0.02280000001192093
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,256,1,0,0.028784000873565675
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,128,64,0,0.02709439992904663
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,256,2,0,0.02279680073261261
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,256,8,0,0.018639999628067016
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,256,4,0,0.026892799139022826
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,256,16,0,0.01863359957933426
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,256,32,0,0.01876160055398941
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,256,64,0,0.018636800348758698
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,32768,1,0,21.613566589355468
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,256,2,0,0.03296479880809784
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,256,1,0,0.039315199851989745
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,256,4,0,0.04737440049648285
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,256,8,0,0.02884800136089325
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,256,16,0,0.027003198862075806
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,256,64,0,0.024792000651359558
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,256,32,0,0.03527039885520935
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,512,2,0,0.030955201387405394
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,512,1,0,0.05140799880027771
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,512,4,0,0.04119040071964264
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,512,8,0,0.02476000040769577
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,512,16,0,0.022878399491310118
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,512,32,0,0.033107200264930726
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,512,64,0,0.022703999280929567
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,512,1,0,0.06699519753456115
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,512,4,0,0.0370608001947403
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,512,2,0,0.06391839981079102
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,512,8,0,0.03298560082912445
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,512,16,0,0.030947199463844298
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,512,32,0,0.04126079976558685
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,512,64,0,0.02890239953994751
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,1024,4,0,0.039668801426887515
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,1024,1,0,0.10628319978713989
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,1024,2,0,0.06370400190353394
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,1024,16,0,0.032918399572372435
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,1024,32,0,0.03102239966392517
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,1024,8,0,0.035158398747444156
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,1024,64,0,0.028836798667907716
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,1024,2,0,0.07774080038070678
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,1024,1,0,0.12719520330429077
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,1024,4,0,0.04943360090255737
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,1024,8,0,0.04527519941329956
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,1024,16,0,0.04056479930877686
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,1024,32,0,0.0391184002161026
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,1024,64,0,0.037294399738311765
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,1536,2,0,0.10171680450439453
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,1536,1,0,0.17142560482025146
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,1536,4,0,0.06327040195465088
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,1536,8,0,0.0453247994184494
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,1536,16,0,0.041254401206970215
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,1536,32,0,0.0392192006111145
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,1536,64,0,0.037041598558425905
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,1536,1,0,0.19597920179367065
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,1536,4,0,0.07432960271835327
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,1536,2,0,0.11433759927749634
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,1536,16,0,0.049414399266242984
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,1536,8,0,0.05554080009460449
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,1536,32,0,0.04540640115737915
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,1536,64,0,0.04522239863872528
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,2048,2,0,0.14301600456237792
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,2048,1,0,0.2503567934036255
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,2048,4,0,0.08854399919509888
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,2048,8,0,0.055561602115631104
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,2048,16,0,0.05133280158042908
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,2048,32,0,0.04740320146083832
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,2048,64,0,0.04535039961338043
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,2048,2,0,0.15714080333709718
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,2048,1,0,0.27481439113616946
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,2048,4,0,0.09793599843978881
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,2048,8,0,0.06579359769821166
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,2048,16,0,0.05956799983978271
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,2048,32,0,0.05548959970474243
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,2048,64,0,0.053492802381515506
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,3072,8,0,0.09385120272636413
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,3072,4,0,0.14537279605865477
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,3072,2,0,0.24579041004180907
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,3072,16,0,0.06979359984397888
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,3072,32,0,0.0657423973083496
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,3072,64,0,0.06364639997482299
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,3072,1,0,0.43829917907714844
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,3072,4,0,0.1560431957244873
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,3072,2,0,0.2572511911392212
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,3072,8,0,0.10021439790725709
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,3072,1,0,0.4575376033782959
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,3072,16,0,0.07598239779472352
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,3072,64,0,0.06778720021247864
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,3072,32,0,0.06987360119819641
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,4096,4,0,0.2157696008682251
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,4096,2,0,0.3699552059173584
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,4096,1,0,0.6685872077941895
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,4096,16,0,0.08837760090827942
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,4096,8,0,0.1369920015335083
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,4096,64,0,0.07802720069885254
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,4096,32,0,0.08212000131607056
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,4096,4,0,0.22008960247039794
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,4096,2,0,0.3757215976715088
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,4096,8,0,0.1422927975654602
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,4096,16,0,0.0962992012500763
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,4096,1,0,0.6713615894317627
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,4096,32,0,0.08818079829216004
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,4096,64,0,0.08420159816741943
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,6144,4,0,0.3904880046844482
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,6144,2,0,0.680079984664917
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,6144,8,0,0.23513119220733641
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,6144,16,0,0.15287519693374635
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,6144,32,0,0.11724319458007812
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,6144,64,0,0.11058559417724609
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,6144,1,0,1.2748895645141602
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,6144,2,0,0.6581759929656983
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,6144,4,0,0.37883360385894777
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,6144,16,0,0.15424319505691528
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,6144,8,0,0.23153600692749024
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,6144,1,0,1.2181391716003418
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,6144,32,0,0.12133599519729614
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,6144,64,0,0.11496800184249878
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,8192,4,0,0.600867223739624
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,8192,2,0,1.0922639846801758
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,8192,8,0,0.3566335916519165
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,8192,32,0,0.15363839864730836
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,8192,16,0,0.23156960010528566
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,8192,64,0,0.1436576008796692
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,8192,1,0,2.4041263580322267
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,8192,4,0,0.5735519886016845
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,8192,2,0,1.0146575927734376
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,8192,8,0,0.47569122314453127
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,8192,16,0,0.22683999538421631
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,8192,32,0,0.15584800243377686
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,8192,64,0,0.21332800388336182
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,8192,1,0,2.017185592651367
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,10240,8,0,0.5170544147491455
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,10240,4,0,1.1712608337402344
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,10240,16,0,0.33352160453796387
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,10240,32,0,0.20945279598236083
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,10240,2,0,1.6416912078857422
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,10240,64,0,0.19933760166168213
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,10240,4,0,0.9103808403015137
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,10240,1,0,3.5886096954345703
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,10240,8,0,0.4702847957611084
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,10240,2,0,1.7690256118774415
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,10240,32,0,0.19866080284118653
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,10240,64,0,0.17819679975509645
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,10240,16,0,0.47624478340148924
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,10240,1,0,2.8654512405395507
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,12288,8,0,0.6803999900817871
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,12288,4,0,1.2775952339172363
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,12288,32,0,0.29676640033721924
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,12288,16,0,0.4271391868591309
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,12288,2,0,2.148931121826172
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,12288,64,0,0.24037280082702636
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,12288,4,0,1.063003158569336
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,12288,8,0,0.6158783912658692
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,12288,16,0,0.3843456029891968
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,12288,2,0,2.4391855239868163
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,12288,64,0,0.20789599418640137
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,12288,32,0,0.2610368013381958
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,12288,1,0,5.876523208618164
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,12288,1,0,4.054264068603516
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,16384,8,0,1.0819808006286622
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,16384,16,0,0.7679776191711426
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,16384,4,0,2.888092803955078
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,16384,32,0,0.4761551856994629
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,16384,64,0,0.4029856204986572
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,16384,2,0,4.982993698120117
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,16384,4,0,1.9713264465332032
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,16384,2,0,3.4029537200927735
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,16384,8,0,0.9636367797851563
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,16384,16,0,0.6664144039154053
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,16384,32,0,0.3904416084289551
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,16384,64,0,0.27324481010437013
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,16384,1,0,10.640147399902343
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,16384,1,0,7.01466064453125
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,32768,8,0,5.027041625976563
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,32768,16,0,2.882681655883789
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,32768,32,0,1.531107234954834
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,2,32768,64,0,0.8671695709228515
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,32768,4,0,11.404009246826172
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,32768,2,0,24.32832489013672
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,32768,2,0,13.438035583496093
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,32768,8,0,3.507555389404297
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,32768,4,0,7.065020751953125
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,32768,32,0,1.281439971923828
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,32768,16,0,1.8016271591186523
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,2,32768,64,0,0.686846399307251
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,1,1,0,0.021966400742530822
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,1,2,0,0.014526399970054626
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,1,4,0,0.014476799964904785
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,1,8,0,0.014539200067520141
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,1,16,0,0.014505599439144135
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,1,32,0,0.014467200636863709
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,1,64,0,0.012545600533485413
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,1,1,0,0.025358399748802184
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,1,2,0,0.022758400440216063
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,1,4,0,0.020768000185489653
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,1,8,0,0.02130720019340515
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,1,16,0,0.02070080041885376
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,1,32,0,0.021396799385547637
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,1,64,0,0.02075680047273636
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,16,1,0,0.018649600446224213
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,16,2,0,0.015747199952602386
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,16,4,0,0.014537599682807923
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,16,8,0,0.014507199823856353
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,16,16,0,0.014537599682807923
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,16,32,0,0.012566399574279786
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,16,64,0,0.01446399986743927
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,16,1,0,0.02686080038547516
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,16,2,0,0.022742399573326112
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,16,4,0,0.02268960028886795
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,16,8,0,0.02268799990415573
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,16,16,0,0.02065120041370392
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,16,32,0,0.02152319997549057
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,16,64,0,0.020691199600696562
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,32,1,0,0.020656000077724456
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,32,2,0,0.016607999801635742
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,32,4,0,0.014552000164985656
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,32,8,0,0.01454080045223236
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,32,16,0,0.014511999487876893
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,32,32,0,0.014507199823856353
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,32,64,0,0.014588800072669984
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,32,1,0,0.028908801078796387
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,32,2,0,0.022750400006771088
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,32,4,0,0.02271520048379898
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,32,8,0,0.020735999941825865
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,32,16,0,0.022729599475860597
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,32,64,0,0.02075359970331192
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,32,32,0,0.02115679979324341
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,64,1,0,0.0247856006026268
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,64,2,0,0.016991999745368958
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,64,4,0,0.01659359931945801
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,64,16,0,0.014524799585342408
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,64,8,0,0.016305600106716157
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,64,32,0,0.014542399346828461
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,64,64,0,0.015859200060367583
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,64,1,0,0.03300159871578216
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,64,2,0,0.02704800069332123
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,64,4,0,0.022767999768257143
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,64,16,0,0.022729599475860597
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,64,8,0,0.0249439999461174
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,64,32,0,0.02272160053253174
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,32768,1,0,28.063412475585938
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,64,64,0,0.023095999658107758
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,128,1,0,0.028857600688934327
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,128,2,0,0.026811200380325317
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,128,8,0,0.01648319959640503
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,128,4,0,0.01807519942522049
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,128,16,0,0.018606400489807128
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,128,32,0,0.01454080045223236
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,128,64,0,0.014905600249767304
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,128,1,0,0.053534400463104245
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,128,2,0,0.030935999751091004
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,128,4,0,0.027063998579978942
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,128,8,0,0.03094240128993988
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,128,16,0,0.02276639938354492
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,128,64,0,0.022657600045204163
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,128,32,0,0.022976000607013703
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,256,1,0,0.04526239931583405
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,256,2,0,0.028857600688934327
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,256,8,0,0.020721599459648132
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,256,4,0,0.025563201308250426
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,256,16,0,0.020638400316238405
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,256,32,0,0.0186831995844841
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,256,64,0,0.01882079988718033
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,256,1,0,0.05966399908065796
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,256,2,0,0.03908959925174713
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,256,4,0,0.03330079913139343
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,256,8,0,0.030158400535583496
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,256,16,0,0.026843199133872987
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,256,32,0,0.026759999990463256
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,256,64,0,0.026817598938941957
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,512,1,0,0.08825439810752869
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,512,4,0,0.03299199938774109
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,512,2,0,0.053504002094268796
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,512,8,0,0.02884480059146881
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,512,16,0,0.024775999784469604
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,512,32,0,0.025028800964355467
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,512,64,0,0.022720000147819518
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,512,4,0,0.04532800018787384
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,512,1,0,0.1124608039855957
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,512,2,0,0.06799039840698243
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,512,16,0,0.035020801424980166
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,512,32,0,0.03239200115203857
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,512,64,0,0.030876800417900085
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,512,8,0,0.03715200126171112
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,1024,2,0,0.1074895977973938
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,1024,1,0,0.19071999788284302
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,1024,4,0,0.06583520174026489
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,1024,16,0,0.039087998867034915
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,1024,8,0,0.04343039989471435
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,1024,32,0,0.03504480123519897
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,32768,1,0,45.45478820800781
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,1024,64,0,0.03314560055732727
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,1024,1,0,0.22547519207000732
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,1024,4,0,0.0812287986278534
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,1024,2,0,0.23978719711303711
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,1024,8,0,0.053439998626708986
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,1024,16,0,0.0474368005990982
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,1024,32,0,0.07403039932250977
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,1024,64,0,0.04197440147399902
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,1536,4,0,0.10470080375671387
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,1536,1,0,0.3141999959945679
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,1536,8,0,0.06770719885826111
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,1536,2,0,0.3154416084289551
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,1536,16,0,0.04958080053329468
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,1536,32,0,0.04537599980831146
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,1536,64,0,0.043188801407814024
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,1536,4,0,0.12178560495376586
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,1536,2,0,0.2010960102081299
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,1536,1,0,0.35665440559387207
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,1536,16,0,0.059592002630233766
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,1536,8,0,0.07931039929389953
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,1536,32,0,0.05568320155143738
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,1536,64,0,0.051374399662017824
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,2048,4,0,0.14766240119934082
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,2048,2,0,0.25439839363098143
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,2048,1,0,0.46580958366394043
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,2048,8,0,0.0928655982017517
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,2048,16,0,0.0617904007434845
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,2048,32,0,0.057518398761749266
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,2048,64,0,0.05251200199127197
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,2048,4,0,0.16410080194473267
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,2048,2,0,0.282039999961853
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,2048,1,0,0.5034624099731445
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,2048,8,0,0.10443359613418579
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,2048,16,0,0.07190719842910767
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,2048,32,0,0.06404640078544617
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,2048,64,0,0.06099200248718262
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,3072,4,0,0.25212960243225097
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,3072,2,0,0.447544002532959
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,3072,8,0,0.15326240062713622
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,3072,16,0,0.10291520357131959
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,3072,1,0,0.8358559608459473
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,3072,32,0,0.0779919981956482
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,3072,64,0,0.0723039984703064
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,3072,4,0,0.26909599304199217
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,3072,2,0,0.4635151863098145
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,3072,16,0,0.11085280179977416
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,3072,8,0,0.1653872013092041
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,3072,32,0,0.08625919818878174
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,3072,64,0,0.08227840065956116
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,3072,1,0,1.1636256217956542
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,4096,4,0,0.42481122016906736
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,4096,2,0,0.684007978439331
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,4096,16,0,0.14823039770126342
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,4096,8,0,0.2294111967086792
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,4096,32,0,0.10012320280075074
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,4096,64,0,0.09446560144424439
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,4096,1,0,1.272385597229004
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,4096,4,0,0.39252960681915283
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,4096,2,0,0.6872672080993653
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,4096,8,0,0.2421504020690918
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,4096,16,0,0.1583232045173645
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,4096,64,0,0.10222400426864624
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,4096,32,0,0.11096320152282715
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,4096,1,0,1.2782095909118651
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,6144,4,0,0.7069136142730713
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,6144,8,0,0.4065408229827881
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,6144,32,0,0.17204480171203612
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,6144,16,0,0.2518255949020386
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,6144,2,0,1.27893123626709
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,6144,64,0,0.134825599193573
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,6144,4,0,0.6845392227172852
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,6144,1,0,2.832601547241211
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,6144,2,0,1.258668804168701
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,6144,8,0,0.40078558921813967
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,6144,32,0,0.17634400129318237
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,6144,16,0,0.3790640115737915
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,6144,64,0,0.14185279607772827
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,6144,1,0,2.3441152572631836
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,8192,4,0,1.1178144454956054
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,8192,8,0,0.6478400230407715
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,8192,16,0,0.3889807939529419
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,8192,32,0,0.2658688068389893
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,8192,64,0,0.188646399974823
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,8192,2,0,2.5103952407836916
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,8192,2,0,1.9368127822875976
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,8192,4,0,1.4353856086730956
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,8192,16,0,0.3910831928253174
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,8192,8,0,0.5993648052215577
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,8192,1,0,5.436240005493164
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,8192,64,0,0.1843135952949524
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,8192,32,0,0.26846239566802976
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,8192,1,0,3.946104049682617
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,10240,4,0,1.6425184249877929
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,10240,8,0,1.2460512161254882
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,10240,2,0,3.8143550872802736
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,10240,16,0,0.5920239925384522
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,10240,64,0,0.24542880058288574
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,10240,32,0,0.5085103988647461
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,10240,2,0,2.812139129638672
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,10240,8,0,0.921332836151123
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,10240,1,0,9.257073974609375
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,10240,4,0,1.481816005706787
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,10240,16,0,0.5050655841827393
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,10240,32,0,0.5124288082122803
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,10240,64,0,0.23471200466156006
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,10240,1,0,5.9886737823486325
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,12288,8,0,1.2583248138427734
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,12288,4,0,2.315888023376465
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,12288,16,0,0.727564811706543
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,12288,32,0,0.6549248218536377
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,12288,64,0,0.3767008066177368
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,12288,2,0,6.381644821166992
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,12288,2,0,4.117884826660156
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,12288,4,0,2.4999200820922853
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,12288,8,0,1.208847999572754
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,12288,32,0,0.43250718116760256
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,12288,16,0,0.692739200592041
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,12288,64,0,0.30530080795288084
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,12288,1,0,13.006224060058594
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,12288,1,0,8.800070190429688
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,16384,8,0,2.1752479553222654
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,16384,16,0,1.5694144248962403
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,16384,32,0,0.9461008071899414
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,16384,4,0,5.884606552124024
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,4,16384,64,0,0.520411205291748
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,16384,2,0,11.559060668945312
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,16384,4,0,3.739393615722656
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,16384,2,0,7.749747467041016
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,16384,8,0,1.9451375961303712
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,16384,16,0,1.328003215789795
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,16384,32,0,0.6592576026916503
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,1,1,0,0.0295087993144989
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,4,16384,64,0,0.45528321266174315
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,1,2,0,0.020745599269866945
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,1,4,0,0.016657599806785585
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,1,8,0,0.014633600413799287
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,1,16,0,0.014552000164985656
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,1,32,0,0.0135903999209404
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,8,1,64,0,0.01449120044708252
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,1,1,0,0.03185440003871918
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,1,2,0,0.027024000883102417
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,1,4,0,0.022651199996471406
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,1,16,0,0.020576000213623047
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,1,8,0,0.022801600396633148
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,1,32,0,0.020735999941825865
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,8,1,64,0,0.02475520074367523
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,16,1,0,0.024846400320529937
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,16,2,0,0.02056799978017807
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,16,4,0,0.014684799313545226
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,16,8,0,0.014688000082969666
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,16,16,0,0.014603200554847717
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,16,32,0,0.014665600657463074
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,8,16,64,0,0.01451520025730133
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,16,1,0,0.037041598558425905
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,16,2,0,0.026793599128723145
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,16,4,0,0.024878400564193725
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,16,8,0,0.022588799893856048
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,16,16,0,0.02285120040178299
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,16,32,0,0.02285760045051575
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,8,16,64,0,0.021771200001239777
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,32,1,0,0.029979199171066284
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,32,2,0,0.020747199654579163
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,32,4,0,0.016465599834918975
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,32,8,0,0.016571199893951415
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,32,16,0,0.014636799693107605
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,32,32,0,0.01478080004453659
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,8,32,64,0,0.014617599546909332
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,32,1,0,0.037227201461791995
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,32,2,0,0.028812798857688903
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,32,4,0,0.02470400035381317
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,32,8,0,0.022815999388694764
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,32,16,0,0.02282399982213974
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,32,32,0,0.02268480062484741
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,8,32,64,0,0.021756799519062044
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,64,1,0,0.035094401240348815
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,64,4,0,0.018459199368953703
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,64,2,0,0.024879999458789825
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,64,8,0,0.01669600009918213
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,64,16,0,0.015401600301265717
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,64,32,0,0.0147024005651474
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,8,64,64,0,0.014529600739479065
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,64,1,0,0.047188800573348996
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,64,2,0,0.03311200141906738
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,64,4,0,0.026705598831176756
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,64,8,0,0.024831999838352204
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,64,16,0,0.02271520048379898
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,64,32,0,0.022865599393844603
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,16384,1,0,23.41107177734375
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,8,64,64,0,0.022814400494098663
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,128,1,0,0.047259199619293216
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,128,4,0,0.02266400009393692
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,128,2,0,0.05342239737510681
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,128,8,0,0.018644799292087556
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,128,16,0,0.016788800060749055
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,128,32,0,0.016708800196647645
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,8,128,64,0,0.016475200653076172
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,128,1,0,0.06366879940032959
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,128,2,0,0.04127680063247681
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,128,8,0,0.026892799139022826
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,128,4,0,0.03293760120868683
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,128,32,0,0.024883200228214265
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,128,16,0,0.025419199466705324
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,8,128,64,0,0.02287199944257736
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,256,1,0,0.0807424008846283
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,256,2,0,0.04529919922351837
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,256,4,0,0.03099679946899414
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,16384,1,0,15.025283813476562
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,256,8,0,0.02699359953403473
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,256,16,0,0.022700800001621245
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,256,32,0,0.02484000027179718
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,8,256,64,0,0.02077919989824295
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,256,1,0,0.10481439828872681
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,256,2,0,0.07407680153846741
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,256,4,0,0.04311839938163757
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,256,8,0,0.03706560134887695
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,256,16,0,0.031006398797035217
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,256,32,0,0.030873599648475646
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,8,256,64,0,0.0289792001247406
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,512,2,0,0.0904911994934082
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,512,1,0,0.16030720472335816
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,512,4,0,0.05546560287475586
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,512,8,0,0.037278398871421814
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,512,16,0,0.03097440004348755
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,512,32,0,0.028984001278877257
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,8,512,64,0,0.02701280117034912
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,512,4,0,0.07189120054244995
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,512,1,0,0.20286240577697753
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,512,2,0,0.11502079963684082
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,512,16,0,0.04118880033493042
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,512,32,0,0.037124800682067874
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,512,8,0,0.04748159945011139
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,8,512,64,0,0.03622080087661743
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,1024,2,0,0.19725919961929322
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,1024,4,0,0.11418399810791016
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,1024,1,0,0.35782880783081056
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,1024,8,0,0.07194079756736756
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,1024,16,0,0.047328001260757445
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,1024,32,0,0.045193600654602054
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,8,1024,64,0,0.0411296010017395
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,1024,2,0,0.23374080657958984
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,1024,4,0,0.136518394947052
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,1024,1,0,0.4171472072601318
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,1024,16,0,0.061622399091720584
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,1024,8,0,0.08820480108261108
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,1024,32,0,0.053487998247146604
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,8,1024,64,0,0.04936479926109314
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,1536,4,0,0.18625439405441285
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,1536,2,0,0.32349119186401365
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,1536,1,0,0.602836799621582
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,1536,16,0,0.07773439884185791
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,1536,8,0,0.11426559686660767
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,1536,32,0,0.05954239964485168
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,8,1536,64,0,0.05543199777603149
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,1536,4,0,0.21127519607543946
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,1536,2,0,0.3671600103378296
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,1536,8,0,0.13096799850463867
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,1536,1,0,0.670033597946167
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,1536,16,0,0.09079520106315613
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,1536,32,0,0.07187359929084777
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,8,1536,64,0,0.06375679969787598
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,2048,4,0,0.2666719913482666
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,2048,2,0,0.4750864028930664
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,2048,8,0,0.16072800159454345
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,2048,32,0,0.07311999797821045
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,2048,1,0,0.8901087760925293
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,8,2048,64,0,0.06784160137176513
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,2048,16,0,0.10481280088424683
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,2048,8,0,0.17840319871902466
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,2048,4,0,0.29632480144500734
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,2048,2,0,0.519217586517334
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,2048,32,0,0.0883023977279663
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,2048,16,0,0.12070239782333374
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,8,2048,64,0,0.0802191972732544
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,2048,1,0,0.9591823577880859
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,3072,8,0,0.2714848041534424
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,3072,4,0,0.4658639907836914
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,3072,32,0,0.1205456018447876
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,3072,16,0,0.17165919542312622
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,3072,2,0,0.8403727531433105
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,8,3072,64,0,0.09446240067481995
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,3072,1,0,1.5997103691101073
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,3072,4,0,0.4896704196929932
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,3072,16,0,0.18661760091781615
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,3072,2,0,0.875489616394043
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,3072,8,0,0.2901695966720581
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,3072,32,0,0.13477280139923095
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,8,3072,64,0,0.10887199640274048
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,3072,1,0,1.6487455368041992
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,4096,4,0,0.7024447917938232
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,4096,8,0,0.40524959564208984
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,4096,16,0,0.2507839918136597
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,4096,32,0,0.17281440496444703
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,4096,2,0,1.3420656204223633
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,8,4096,64,0,0.12536799907684326
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,4096,1,0,2.5925056457519533
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,4096,2,0,1.3319519996643066
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,4096,8,0,0.41452798843383787
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,4096,4,0,0.9935104370117187
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,4096,16,0,0.2636735916137695
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,4096,32,0,0.18948639631271363
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,4096,1,0,2.606671905517578
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,8,4096,64,0,0.1376736044883728
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,6144,4,0,1.3115967750549316
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,6144,8,0,0.76462721824646
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,6144,16,0,0.44276962280273435
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,6144,2,0,3.247614288330078
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,6144,32,0,0.29047839641571044
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,8,6144,64,0,0.28903679847717284
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,6144,4,0,1.3219375610351562
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,6144,2,0,2.403539276123047
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,6144,8,0,0.7288335800170899
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,6144,1,0,6.712872314453125
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,6144,32,0,0.29810080528259275
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,6144,16,0,0.46072959899902344
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,8,6144,64,0,0.33178880214691164
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,6144,1,0,4.820982360839844
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,8192,4,0,2.4862895965576173
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,8192,8,0,1.1526783943176269
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,8192,32,0,0.42527041435241697
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,8192,16,0,0.9021007537841796
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,8,8192,64,0,0.3009104013442993
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,8192,2,0,5.555209732055664
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,8192,4,0,2.0925935745239257
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,8192,2,0,3.9118606567382814
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,8192,16,0,0.6671520233154297
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,8192,8,0,1.1307680130004882
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,8192,32,0,0.4379631996154785
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,1,1,0,0.048390400409698484
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,8,8192,64,0,0.31026558876037597
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,1,2,0,0.026526400446891786
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,1,8,0,0.014670400321483612
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,1,4,0,0.01913439929485321
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,1,16,0,0.014612799882888794
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,1,32,0,0.01266079992055893
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,16,1,64,0,0.014468799531459808
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,1,1,0,0.03933280110359192
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,1,2,0,0.030937600135803222
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,1,8,0,0.022681599855422972
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,1,4,0,0.026867198944091796
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,1,16,0,0.02077919989824295
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,1,32,0,0.020851199328899384
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,16,1,64,0,0.020638400316238405
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,16,1,0,0.037520000338554384
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,16,2,0,0.024985599517822265
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,16,8,0,0.01658719927072525
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,16,4,0,0.018729600310325622
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,16,16,0,0.014636799693107605
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,16,32,0,0.014572800695896148
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,16,16,64,0,0.014558400213718414
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,16,1,0,0.047331199049949646
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,16,2,0,0.03296639919281006
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,8192,1,0,12.264310455322265
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,16,8,0,0.022737599909305573
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,16,4,0,0.027003198862075806
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,16,32,0,0.020793600380420683
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,16,16,0,0.030958399176597595
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,16,16,64,0,0.020878399908542632
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,32,2,0,0.04529440104961395
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,32,1,0,0.043227198719978335
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,32,4,0,0.020688000321388244
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,32,8,0,0.02280000001192093
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,32,16,0,0.015440000593662262
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,32,32,0,0.014632000029087067
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,16,32,64,0,0.02077440023422241
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,32,1,0,0.053457599878311154
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,32,4,0,0.043201598525047305
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,32,2,0,0.039068800210952756
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,32,8,0,0.022843199968338012
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,32,16,0,0.032902398705482484
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,32,32,0,0.02284799963235855
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,16,32,64,0,0.022761599719524385
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,64,1,0,0.05560960173606873
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,8192,1,0,8.78296127319336
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,64,2,0,0.035838401317596434
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,64,4,0,0.024745599925518037
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,64,8,0,0.018478399515151976
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,64,32,0,0.016648000478744505
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,16,64,64,0,0.014593599736690522
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,64,16,0,0.020582400262355804
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,64,1,0,0.06996960043907166
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,64,4,0,0.03501279950141907
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,64,2,0,0.04546720087528229
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,64,8,0,0.030854400992393494
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,64,16,0,0.026627200841903686
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,64,32,0,0.02279839962720871
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,16,64,64,0,0.022836799919605254
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,128,2,0,0.04941120147705078
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,128,1,0,0.09852160215377807
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,128,4,0,0.030929601192474364
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,128,8,0,0.022940799593925476
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,128,16,0,0.020577600598335265
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,128,32,0,0.022708800435066224
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,16,128,64,0,0.016595199704170227
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,128,2,0,0.06506400108337403
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,128,4,0,0.04527519941329956
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,128,1,0,0.10879520177841187
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,128,8,0,0.033102399110794066
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,128,16,0,0.02884480059146881
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,128,32,0,0.02691200077533722
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,16,128,64,0,0.02484479993581772
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,256,2,0,0.08296959996223449
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,256,4,0,0.04940800070762634
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,256,1,0,0.1432960033416748
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,256,8,0,0.03296799957752228
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,256,16,0,0.029044800996780397
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,256,32,0,0.026807999610900878
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,16,256,64,0,0.022814400494098663
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,256,2,0,0.10870879888534546
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,256,1,0,0.18739999532699586
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,256,4,0,0.06573119759559631
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,256,8,0,0.045256000757217404
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,256,16,0,0.039241600036621097
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,256,32,0,0.03501920104026794
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,16,256,64,0,0.03303520083427429
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,512,2,0,0.16599520444869995
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,512,4,0,0.09667360186576843
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,512,1,0,0.2996959924697876
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,512,8,0,0.06059359908103943
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,512,16,0,0.04118880033493042
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,512,32,0,0.037134400010108946
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,16,512,64,0,0.03302719891071319
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,512,4,0,0.12203999757766723
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,512,2,0,0.2092479944229126
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,512,1,0,0.3703023910522461
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,512,8,0,0.07801759839057923
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,512,16,0,0.05552160143852234
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,512,32,0,0.04933120012283325
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,16,512,64,0,0.045307201147079465
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,1024,4,0,0.2076256036758423
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,1024,2,0,0.36471679210662844
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,1024,16,0,0.08488799929618836
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,1024,8,0,0.12631839513778687
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,1024,1,0,0.6840943813323974
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,16,1024,64,0,0.055478399991989134
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,1024,32,0,0.05976639986038208
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,1024,4,0,0.2471343994140625
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,1024,2,0,0.4315360069274902
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,1024,8,0,0.14977600574493408
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,1024,1,0,0.7965968132019043
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,1024,16,0,0.10290720462799072
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,1024,32,0,0.07607359886169433
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,16,1024,64,0,0.06978560090065003
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,1536,4,0,0.33974881172180177
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,1536,2,0,0.6106991767883301
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,1536,16,0,0.13112159967422485
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,1536,8,0,0.2024240016937256
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,1536,32,0,0.0924351990222931
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,16,1536,64,0,0.07612959742546081
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,1536,1,0,1.1562416076660156
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,1536,4,0,0.3888943910598755
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,1536,2,0,0.6928991794586181
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,1536,8,0,0.2329024076461792
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,1536,16,0,0.15396159887313843
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,1536,32,0,0.11369600296020507
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,16,1536,64,0,0.09243360161781311
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,1536,1,0,1.2897104263305663
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,2048,4,0,0.4988431930541992
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,2048,8,0,0.28668320178985596
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,2048,32,0,0.12873920202255248
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,2048,16,0,0.1836192011833191
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,16,2048,64,0,0.09481440186500549
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,2048,2,0,0.9130191802978516
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,2048,1,0,1.7199615478515624
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,2048,4,0,0.5458687782287598
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,2048,2,0,0.9851679801940918
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,2048,8,0,0.3259504079818726
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,2048,16,0,0.20984160900115967
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,16,2048,64,0,0.11856000423431397
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,2048,32,0,0.14874720573425293
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,2048,1,0,1.8648704528808593
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,3072,4,0,0.8706864356994629
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,3072,8,0,0.49969120025634767
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,3072,32,0,0.20788800716400146
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,3072,16,0,0.30492639541625977
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,16,3072,64,0,0.15393279790878295
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,3072,2,0,1.6281440734863282
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,3072,4,0,1.0060784339904785
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,3072,2,0,1.6917024612426759
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,3072,8,0,0.5304143905639649
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,3072,1,0,3.7548286437988283
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,3072,32,0,0.22949280738830566
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,3072,16,0,0.3354320049285889
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,16,3072,64,0,0.2398655891418457
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,3072,1,0,3.2353679656982424
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,4096,4,0,1.3560799598693847
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,4096,8,0,0.7451663970947265
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,4096,32,0,0.2955535888671875
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,4096,16,0,0.48392958641052247
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,16,4096,64,0,0.21833438873291017
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,4096,2,0,2.970327949523926
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,4096,4,0,1.6892160415649413
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,4096,2,0,2.5399711608886717
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,4096,8,0,0.7738272190093994
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,4096,1,0,6.308512115478516
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,4096,16,0,0.4744272232055664
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,4096,32,0,0.4257184028625488
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,16,4096,64,0,0.24366400241851807
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,1,1,0,0.07187359929084777
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,1,2,0,0.03305439949035645
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,1,4,0,0.022710399329662324
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,1,8,0,0.018692800402641298
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,1,16,0,0.014484800398349762
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,1,32,0,0.01451839953660965
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,32,1,64,0,0.014455999433994293
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,1,1,0,0.059515202045440675
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,1,2,0,0.039087998867034915
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,1,4,0,0.028988799452781676
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,1,8,0,0.026849600672721862
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,1,16,0,0.022649599611759184
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,1,32,0,0.022737599909305573
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,32,1,64,0,0.022784000635147093
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,4096,1,0,5.368641662597656
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,16,1,0,0.062003201246261595
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,16,2,0,0.03811199963092804
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,16,8,0,0.018641600012779237
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,16,4,0,0.027158400416374205
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,16,32,0,0.014552000164985656
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,16,16,0,0.01472959965467453
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,32,16,64,0,0.016748799383640288
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,16,2,0,0.047275200486183167
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,16,1,0,0.07391200065612794
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,16,4,0,0.037334400415420535
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,16,8,0,0.02690559923648834
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,16,16,0,0.022728000581264497
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,32,16,64,0,0.02282879948616028
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,16,32,0,0.02484000027179718
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,32,1,0,0.07419520020484924
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,32,2,0,0.04530560076236725
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,32,8,0,0.02067680060863495
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,32,4,0,0.033134400844573975
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,32,16,0,0.016676799952983858
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,32,32,64,0,0.01676799952983856
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,32,32,0,0.014580799639225006
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,32,1,0,0.0870959997177124
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,32,4,0,0.0390639990568161
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,32,8,0,0.030931198596954347
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,32,2,0,0.0597536027431488
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,32,16,0,0.024728000164031982
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,32,32,0,0.024873599410057068
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,32,32,64,0,0.024873599410057068
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,64,2,0,0.05652639865875244
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,64,1,0,0.10249919891357422
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,64,4,0,0.041259199380874634
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,64,8,0,0.026787200570106508
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,64,16,0,0.02064799964427948
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,32,64,64,0,0.01852640062570572
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,64,32,0,0.01889120042324066
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,64,1,0,0.12633440494537354
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,64,2,0,0.07380319833755493
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,64,4,0,0.049532800912857056
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,64,8,0,0.035020801424980166
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,64,16,0,0.02874560058116913
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,32,64,64,0,0.024779200553894043
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,64,32,0,0.028142398595809935
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,128,1,0,0.15360159873962403
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,128,2,0,0.08421599864959717
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,128,4,0,0.051577597856521606
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,128,8,0,0.03506560027599335
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,128,16,0,0.026793599128723145
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,128,32,0,0.022745600342750548
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,32,128,64,0,0.022548800706863402
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,128,4,0,0.06986399888992309
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,128,2,0,0.11095520257949829
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,128,1,0,0.20012478828430175
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,128,16,0,0.03716639876365661
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,128,32,0,0.03091840147972107
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,128,8,0,0.04732640087604523
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,32,128,64,0,0.031036800146102904
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,256,2,0,0.15012799501419066
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,256,1,0,0.2709343910217285
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,256,4,0,0.08839679956436157
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,256,16,0,0.039024001359939574
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,256,8,0,0.05575199723243714
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,256,32,0,0.035124799609184264
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,32,256,64,0,0.03097119927406311
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,256,8,0,0.07203519940376282
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,256,2,0,0.19478559494018555
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,256,4,0,0.11631840467453003
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,256,1,0,0.3500591993331909
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,256,16,0,0.05152159929275513
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,256,32,0,0.045203199982643126
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,32,256,64,0,0.041331198811531064
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,512,4,0,0.1801408052444458
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,512,8,0,0.10734080076217652
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,512,2,0,0.31214079856872556
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,512,16,0,0.07209759950637817
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,512,1,0,0.5801392078399659
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,512,32,0,0.053590399026870725
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,32,512,64,0,0.049379199743270874
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,512,8,0,0.1366768002510071
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,512,4,0,0.22236480712890624
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,512,2,0,0.38688480854034424
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,512,16,0,0.09317439794540405
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,512,32,0,0.0700767993927002
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,512,1,0,0.70938720703125
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,32,512,64,0,0.06376799941062927
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,1024,4,0,0.3881295919418335
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,1024,2,0,0.7030784130096436
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,1024,8,0,0.22982559204101563
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,1024,16,0,0.14994720220565796
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,1024,32,0,0.10718560218811035
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,32,1024,64,0,0.0821071982383728
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,1024,1,0,1.3190367698669434
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,1024,4,0,0.46002559661865233
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,1024,2,0,0.8224783897399902
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,1024,8,0,0.27845919132232666
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,1024,16,0,0.18042559623718263
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,1024,32,0,0.1328943967819214
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,32,1024,64,0,0.10331679582595825
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,1024,1,0,1.5418512344360351
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,1536,4,0,0.647870397567749
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,1536,2,0,1.1877344131469727
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,1536,8,0,0.37379040718078616
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,1536,32,0,0.16648319959640503
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,1536,16,0,0.23757600784301758
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,32,1536,64,0,0.12885600328445435
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,1536,1,0,2.3226640701293944
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,1536,2,0,1.330470371246338
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,1536,4,0,0.8218704223632812
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,1536,32,0,0.23797600269317626
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,1536,16,0,0.2780368089675903
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,1536,8,0,0.43116321563720705
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,32,1536,64,0,0.15410239696502687
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,1536,1,0,2.5407072067260743
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,2048,8,0,0.5401936054229737
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,2048,4,0,0.9463680267333985
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,2048,16,0,0.33420000076293943
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,2048,32,0,0.23031039237976075
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,32,2048,64,0,0.1758671998977661
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,2048,2,0,1.7727872848510742
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,2048,4,0,1.1278063774108886
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,2048,1,0,3.9499263763427734
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,2048,2,0,1.9196464538574218
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,2048,8,0,0.6150271892547607
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,2048,32,0,0.26658880710601807
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,32,2048,64,0,0.20795838832855223
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,64,1,1,0,0.09845920205116272
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,2048,16,0,0.5307968139648438
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,64,1,2,0,0.05150880217552185
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,64,1,4,0,0.03296639919281006
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,64,1,16,0,0.018726399540901183
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,64,1,8,0,0.02470400035381317
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,64,1,32,0,0.014611199498176575
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,64,1,64,0,0.016448000073432924
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,64,1,1,0,0.09659680128097534
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,64,1,2,0,0.05749120116233826
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,64,1,4,0,0.039294400811195375
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,64,1,8,0,0.030950400233268737
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,64,1,16,0,0.024798400700092316
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,64,1,32,0,0.0227743998169899
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,64,1,64,0,0.022862400114536285
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,64,16,2,0,0.06386399865150452
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,2048,1,0,3.669716644287109
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,64,16,1,0,0.11288800239562988
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,64,16,4,0,0.03911519944667816
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,64,16,8,0,0.026921600103378296
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,64,16,32,0,0.016612799465656282
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,64,16,64,0,0.014627200365066529
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,64,16,16,0,0.01881600022315979
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,64,16,1,0,0.1252079963684082
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,64,16,2,0,0.0718608021736145
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,64,16,4,0,0.04738079905509949
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,64,16,8,0,0.033051198720932005
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,64,16,16,0,0.028705599904060363
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,64,16,32,0,0.02343199998140335
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,64,16,64,0,0.022793599963188173
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,64,32,4,0,0.04525440037250519
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,64,32,1,0,0.1393072009086609
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,64,32,8,0,0.031033599376678468
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,64,32,2,0,0.07607839703559875
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,64,32,16,0,0.022734400629997254
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,64,32,32,0,0.018638400733470915
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,64,32,64,0,0.016731199622154237
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,64,32,1,0,0.1593583941459656
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,64,32,4,0,0.05545439720153809
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,64,32,8,0,0.041196799278259276
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,64,32,2,0,0.09037759900093079
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,64,32,16,0,0.03289119899272919
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,64,32,32,0,0.026903998851776124
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,64,32,64,0,0.024823999404907225
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,64,64,1,0,0.19164639711380005
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,64,64,4,0,0.059552001953125
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,64,64,2,0,0.10497759580612183
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,64,64,8,0,0.039155200123786926
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,64,64,16,0,0.028835201263427736
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,64,64,32,0,0.022655999660491942
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,64,64,64,0,0.020695999264717102
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,64,64,4,0,0.0760047972202301
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,64,64,8,0,0.0514303982257843
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,64,64,1,0,0.23458399772644042
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,64,64,2,0,0.13044480085372925
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,64,64,16,0,0.040863999724388124
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,64,64,32,0,0.0327567994594574
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,64,64,64,0,0.02951039969921112
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,64,128,1,0,0.28883359432220457
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,64,128,4,0,0.09112640023231507
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,64,128,2,0,0.15851999521255494
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,64,128,8,0,0.05554559826850891
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,64,128,32,0,0.032876798510551454
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,64,128,16,0,0.041064000129699706
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,64,128,64,0,0.026844799518585205
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,64,128,4,0,0.11889599561691284
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,64,128,2,0,0.2061903953552246
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,64,128,1,0,0.37059199810028076
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,64,128,16,0,0.053420799970626834
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,64,128,8,0,0.07401760220527649
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,64,128,32,0,0.043137601017951964
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,64,128,64,0,0.03915840089321136
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,64,256,4,0,0.16183359622955323
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,64,256,2,0,0.2836672067642212
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,64,256,1,0,0.521068811416626
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,64,256,8,0,0.10054080486297608
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,64,256,16,0,0.0679423987865448
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,64,256,64,0,0.04543519914150238
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,64,256,32,0,0.051342397928237915
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,64,256,4,0,0.20854721069335938
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,64,256,2,0,0.3638159990310669
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,64,256,1,0,0.6679327964782715
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,64,256,8,0,0.12924799919128419
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,64,256,64,0,0.05964159965515137
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,64,256,32,0,0.0677727997303009
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,64,256,16,0,0.08833919763565064
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,64,512,4,0,0.33309919834136964
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,64,512,2,0,0.6016816139221192
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,64,512,8,0,0.20055038928985597
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,64,512,16,0,0.1319424033164978
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,64,512,32,0,0.0971664011478424
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,64,512,64,0,0.0759775996208191
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,64,512,1,0,1.1266400337219238
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,64,512,4,0,0.4135280132293701
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,64,512,2,0,0.7391039848327636
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,64,512,8,0,0.251308798789978
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,64,512,16,0,0.1673792004585266
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,64,512,32,0,0.12108319997787476
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,64,512,1,0,1.3819791793823242
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,64,512,64,0,0.0985759973526001
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,64,1024,4,0,0.751251220703125
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,64,1024,16,0,0.2766752004623413
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,64,1024,8,0,0.43247199058532715
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,64,1024,2,0,1.3704192161560058
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,64,1024,64,0,0.15274239778518678
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,64,1024,32,0,0.1976415991783142
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,64,1024,1,0,2.6020368576049804
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,64,1024,4,0,0.881982421875
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,64,1024,16,0,0.33512799739837645
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,64,1024,8,0,0.5469232082366944
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,64,1024,2,0,1.6014223098754883
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,64,1024,64,0,0.2008687973022461
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,64,1024,32,0,0.24066081047058105
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,128,1,1,0,0.16941440105438232
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,128,1,2,0,0.09855039715766907
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,128,1,4,0,0.05533120036125183
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,128,1,8,0,0.030964800715446474
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,128,1,16,0,0.022694399952888487
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,128,1,32,0,0.01852799952030182
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,128,1,64,0,0.014556799829006196
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,128,1,2,0,0.09790239930152893
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,128,1,1,0,0.17390559911727904
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,128,1,4,0,0.05963519811630249
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,128,1,8,0,0.04107840061187744
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,128,1,16,0,0.030939200520515443
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,128,1,32,0,0.026824000477790832
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,128,1,64,0,0.02271360009908676
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,64,1024,1,0,3.0442399978637695
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,128,16,2,0,0.1129472017288208
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,128,16,1,0,0.21750400066375733
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,128,16,4,0,0.06570559740066528
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,128,16,8,0,0.04113759994506836
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,128,16,16,0,0.026836800575256347
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,128,16,32,0,0.020766399800777435
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,128,16,64,0,0.016684800386428833
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,128,16,2,0,0.12602399587631224
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,128,16,8,0,0.049323201179504395
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,128,16,4,0,0.07437919974327087
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,128,16,1,0,0.2307584047317505
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,128,16,16,0,0.03503040075302124
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,128,16,64,0,0.02476159930229187
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,128,16,32,0,0.030921599268913268
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,128,32,8,0,0.049348801374435425
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,128,32,1,0,0.26632959842681886
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,128,32,4,0,0.07872800230979919
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,128,32,2,0,0.1427440047264099
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,128,32,16,0,0.0348688006401062
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,128,32,32,0,0.026824000477790832
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,128,32,64,0,0.020686399936676026
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,128,32,8,0,0.05966079831123352
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,128,32,4,0,0.09276000261306763
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,128,32,1,0,0.2949280023574829
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,128,32,2,0,0.1616528034210205
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,128,32,16,0,0.04336319863796234
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,128,32,32,0,0.033748799562454225
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,128,32,64,0,0.02884320020675659
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,128,64,4,0,0.1117967963218689
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,128,64,1,0,0.36758079528808596
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,128,64,2,0,0.19665119647979737
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,128,64,8,0,0.06590880155563354
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,128,64,16,0,0.0452672004699707
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,128,64,32,0,0.03499839901924133
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,128,64,64,0,0.02895680069923401
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,128,64,2,0,0.24052000045776367
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,128,64,4,0,0.1370303988456726
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,128,64,1,0,0.44169921875
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,128,64,8,0,0.08442720174789428
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,128,64,16,0,0.059592002630233766
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,128,64,32,0,0.047328001260757445
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,128,64,64,0,0.03912160098552704
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,128,128,4,0,0.170251202583313
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,128,128,2,0,0.30034561157226564
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,128,128,1,0,0.5581696033477783
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,128,128,8,0,0.10256479978561402
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,128,128,16,0,0.06969119906425476
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,128,128,64,0,0.04318720102310181
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,128,128,32,0,0.05138720273971557
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,128,128,4,0,0.22079999446868898
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,128,128,2,0,0.38385920524597167
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,128,128,1,0,0.7101679801940918
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,128,128,8,0,0.13420159816741944
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,128,128,16,0,0.09136959910392761
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,128,128,64,0,0.05761439800262451
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,128,128,32,0,0.06979039907455445
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,128,256,4,0,0.3082688093185425
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,128,256,2,0,0.5447887897491455
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,128,256,8,0,0.18326560258865357
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,128,256,1,0,1.0183856010437011
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,128,256,32,0,0.08939200043678283
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,128,256,16,0,0.12357439994812011
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,128,256,64,0,0.07184000015258789
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,128,256,4,0,0.39146080017089846
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,128,256,2,0,0.691918420791626
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,128,256,8,0,0.23588640689849855
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,128,256,16,0,0.16000479459762573
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,128,256,1,0,1.299726390838623
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,128,256,32,0,0.11581759452819824
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,128,256,64,0,0.09654880166053773
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,128,512,8,0,0.37770240306854247
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,128,512,4,0,0.64301438331604
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,128,512,16,0,0.2443631887435913
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,128,512,32,0,0.17655199766159058
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,128,512,64,0,0.14187040328979492
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,128,512,2,0,1.1730575561523438
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,128,512,1,0,2.196188735961914
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,128,512,4,0,0.7966752052307129
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,128,512,2,0,1.4415231704711915
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,128,512,16,0,0.31058239936828613
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,128,512,8,0,0.47014079093933103
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,128,512,64,0,0.18064479827880858
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,128,512,32,0,0.22465920448303223
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,256,1,1,0,0.33043038845062256
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,256,1,2,0,0.1735327959060669
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,256,1,4,0,0.09253600239753723
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,256,1,8,0,0.05279200077056885
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,256,1,16,0,0.03284800052642822
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,256,1,32,0,0.024736000597476958
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,256,1,64,0,0.01666239947080612
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,128,512,1,0,2.728260803222656
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,256,1,2,0,0.17283519506454467
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,256,1,4,0,0.09826239943504333
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,256,1,1,0,0.327291202545166
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,256,1,8,0,0.05963839888572693
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,256,1,16,0,0.039212799072265624
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,256,1,32,0,0.030819201469421388
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,256,1,64,0,0.024932800233364104
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,256,16,8,0,0.06792479753494263
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,256,16,2,0,0.21892960071563722
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,256,16,4,0,0.11603200435638428
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,256,16,1,0,0.4139440059661865
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,256,16,16,0,0.043351998925209044
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,256,16,64,0,0.02479040026664734
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,256,16,32,0,0.030939200520515443
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,256,16,8,0,0.07814559936523438
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,256,16,4,0,0.1305791974067688
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,256,16,2,0,0.23560640811920167
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,256,16,1,0,0.4322383880615234
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,256,16,16,0,0.05342879891395569
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,256,16,32,0,0.039094400405883786
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,256,16,64,0,0.032971200346946714
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,256,32,4,0,0.14683680534362792
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,256,32,8,0,0.08479840159416199
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,256,32,2,0,0.2707184076309204
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,256,32,1,0,0.5140687942504882
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,256,32,32,0,0.039155200123786926
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,256,32,16,0,0.05345919728279114
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,256,32,64,0,0.031044799089431762
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,256,32,4,0,0.16813119649887084
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,256,32,8,0,0.09998080134391785
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,256,32,2,0,0.3019488096237183
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,256,32,16,0,0.06781119704246522
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,256,32,1,0,0.5599999904632569
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,256,32,32,0,0.051444798707962036
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,256,32,64,0,0.041193601489067075
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,256,64,4,0,0.20848639011383058
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,256,64,8,0,0.12378720045089722
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,256,64,2,0,0.37752161026000974
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,256,64,16,0,0.07810400128364563
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,256,64,32,0,0.05586720108985901
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,256,64,1,0,0.7069983959197998
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,256,64,64,0,0.04731520116329193
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,256,64,4,0,0.2547616004943848
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,256,64,2,0,0.45525760650634767
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,256,64,8,0,0.15171200037002563
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,256,64,16,0,0.1005519986152649
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,256,64,1,0,0.8528304100036621
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,256,64,32,0,0.07401599884033203
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,256,64,64,0,0.05979520082473755
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,256,128,4,0,0.3190783977508545
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,256,128,2,0,0.5776735782623291
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,256,128,8,0,0.19199999570846557
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,256,128,1,0,1.091204833984375
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,256,128,16,0,0.12518399953842163
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,256,128,32,0,0.09233919978141784
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,256,128,64,0,0.07219679951667786
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,256,128,4,0,0.41229758262634275
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,256,128,2,0,0.7366911888122558
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,256,128,8,0,0.24972479343414306
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,256,128,16,0,0.16383520364761353
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,256,128,1,0,1.3874560356140138
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,256,128,32,0,0.11931840181350709
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,256,128,64,0,0.09646720290184022
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,256,256,4,0,0.585913610458374
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,256,256,2,0,1.0608847618103028
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,256,256,8,0,0.3488415956497192
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,256,256,16,0,0.22801759243011474
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,256,256,32,0,0.16809760332107543
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,1,256,256,64,0,0.1350335955619812
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,256,256,1,0,1.999332809448242
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,256,256,8,0,0.44701600074768066
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,256,256,4,0,0.7493919849395752
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,256,256,16,0,0.2954511880874634
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,256,256,32,0,0.21737918853759766
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,1,1,0,0.015063999593257904
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,1,2,0,0.012542399764060973
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,256,256,2,0,1.3581151962280273
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,1,256,256,64,0,0.17516160011291504
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,1,4,0,0.012563200294971466
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,1,8,0,0.014431999623775482
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,1,32,0,0.013604800403118133
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,1,16,0,0.014267200231552124
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,1,64,0,0.012617599964141846
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,1,1,0,0.020678399503231047
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,1,2,0,0.022726400196552275
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,1,4,0,0.020609599351882935
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,1,8,0,0.020659199357032774
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,1,16,0,0.020769600570201874
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,1,64,0,0.020561599731445314
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,1,32,0,0.020710399746894835
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,16,1,0,0.01658719927072525
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,16,2,0,0.014521600306034088
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,16,4,0,0.014500799775123595
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,16,8,0,0.014580799639225006
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,16,16,0,0.014529600739479065
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,16,32,0,0.012558400630950928
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,16,64,0,0.014547200500965118
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,256,256,1,0,2.5591712951660157
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,16,1,0,0.022732800245285033
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,16,2,0,0.02260479927062988
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,16,4,0,0.022652800381183624
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,16,32,0,0.020563200116157532
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,16,8,0,0.02073120027780533
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,16,16,0,0.0205935999751091
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,16,64,0,0.020638400316238405
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,32,4,0,0.014427199959754944
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,32,1,0,0.01653600037097931
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,32,2,0,0.014537599682807923
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,32,8,0,0.014667199552059173
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,32,16,0,0.014486399292945863
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,32,32,0,0.014467200636863709
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,32,64,0,0.012511999905109405
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,32,2,0,0.022697600722312927
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,32,1,0,0.02476000040769577
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,32,8,0,0.020660799741744996
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,32,4,0,0.022788800299167633
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,32,16,0,0.02064639925956726
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,64,1,0,0.018412800133228303
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,32,64,0,0.020737600326538087
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,64,2,0,0.016543999314308167
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,32,32,0,0.02263839989900589
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,64,16,0,0.014476799964904785
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,64,8,0,0.014668799936771393
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,64,4,0,0.014643199741840363
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,64,32,0,0.014603200554847717
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,64,64,0,0.012615999579429627
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,64,1,0,0.024910399317741395
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,64,2,0,0.02285120040178299
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,64,8,0,0.02077919989824295
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,64,32,0,0.022631999850273133
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,64,64,0,0.022651199996471406
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,64,16,0,0.020657600462436677
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,64,4,0,0.022782400250434875
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,128,1,0,0.020791999995708466
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,128,4,0,0.016633599996566772
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,128,2,0,0.018539200723171233
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,128,8,0,0.014667199552059173
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,128,16,0,0.016414399445056915
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,128,32,0,0.01459839940071106
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,128,64,0,0.014497600495815277
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,128,1,0,0.031083199381828307
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,128,2,0,0.026851201057434083
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,128,4,0,0.0227183997631073
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,128,16,0,0.02396000027656555
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,128,32,0,0.02269120067358017
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,128,8,0,0.022649599611759184
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,128,64,0,0.020776000618934632
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,256,1,0,0.026846399903297423
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,256,2,0,0.022771200537681578
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,256,4,0,0.020695999264717102
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,256,8,0,0.018632000684738158
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,256,16,0,0.01874080002307892
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,256,32,0,0.018643200397491455
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,256,64,0,0.01863359957933426
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,256,1,0,0.04105759859085083
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,256,2,0,0.03490560054779053
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,256,4,0,0.028870400786399842
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,256,16,0,0.024855999648571013
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,256,32,0,0.024748800694942473
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,256,8,0,0.0268640011548996
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,256,64,0,0.024772800505161285
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,512,2,0,0.03286879956722259
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,512,1,0,0.051374399662017824
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,512,4,0,0.02683520019054413
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,512,8,0,0.02473440021276474
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,512,16,0,0.022755199670791627
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,512,32,0,0.02072799950838089
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,512,64,0,0.02077919989824295
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,512,1,0,0.0657584011554718
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,512,2,0,0.043249601125717164
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,512,4,0,0.035104000568389894
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,512,8,0,0.03100320100784302
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,512,16,0,0.02887200117111206
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,512,32,0,0.028971201181411742
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,512,64,0,0.028891199827194215
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,1024,2,0,0.06171839833259583
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,1024,4,0,0.03821440041065216
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,1024,1,0,0.10498559474945068
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,1024,8,0,0.03499679863452911
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,1024,16,0,0.030921599268913268
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,1024,32,0,0.02929919958114624
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,1024,64,0,0.028993600606918336
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,1024,2,0,0.07532960176467896
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,1024,4,0,0.04935039877891541
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,1024,1,0,0.12533119916915894
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,1024,8,0,0.043198400735855104
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,1024,16,0,0.039048001170158386
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,1024,32,0,0.03707360029220581
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,1024,64,0,0.0350847989320755
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,1536,2,0,0.09827039837837219
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,1536,4,0,0.06096479892730713
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,1536,1,0,0.17003680467605592
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,1536,8,0,0.04320319890975952
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,1536,16,0,0.03912000060081482
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,1536,32,0,0.03700799942016601
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,1536,64,0,0.03500480055809021
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,1536,2,0,0.11311999559402466
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,1536,4,0,0.07188799977302551
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,1536,1,0,0.19426560401916504
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,1536,8,0,0.05347679853439331
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,1536,16,0,0.047295999526977536
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,1536,32,0,0.043201598525047305
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,1536,64,0,0.042859199643135074
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,2048,2,0,0.1383039951324463
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,2048,1,0,0.24685280323028563
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,2048,4,0,0.0853663980960846
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,2048,8,0,0.05156959891319275
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,2048,16,0,0.048670399188995364
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,2048,32,0,0.04429279863834381
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,2048,64,0,0.0432671993970871
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,2048,2,0,0.15492000579833984
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,2048,1,0,0.26986720561981203
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,2048,4,0,0.09548320174217224
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,2048,8,0,0.0616815984249115
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,2048,16,0,0.05548480153083801
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,2048,32,0,0.05141119956970215
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,2048,64,0,0.04936800003051758
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,3072,4,0,0.14051200151443483
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,3072,2,0,0.24020159244537354
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,3072,1,0,0.43404321670532225
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,3072,16,0,0.0637615978717804
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,3072,32,0,0.05983520150184631
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,3072,8,0,0.08852800130844116
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,3072,64,0,0.057520002126693726
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,3072,4,0,0.1483888030052185
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,3072,2,0,0.2528079986572266
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,3072,8,0,0.0962544023990631
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,3072,1,0,0.4504096031188965
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,3072,16,0,0.07213919758796691
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,3072,32,0,0.06572480201721191
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,3072,64,0,0.06369600296020508
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,4096,4,0,0.20997118949890137
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,4096,2,0,0.3632767915725708
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,4096,16,0,0.08233919739723206
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,4096,1,0,0.6705664157867431
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,4096,32,0,0.0761247992515564
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,4096,8,0,0.13171679973602296
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,4096,64,0,0.07194079756736756
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,4096,4,0,0.21567039489746093
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,4096,2,0,0.36633920669555664
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,4096,8,0,0.1336192011833191
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,4096,16,0,0.08826239705085755
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,4096,1,0,0.6692592144012451
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,4096,32,0,0.08005759716033936
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,4096,64,0,0.07802559733390808
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,6144,4,0,0.3785504102706909
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,6144,2,0,0.6790751934051513
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,6144,8,0,0.2262160062789917
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,6144,16,0,0.14526720046997071
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,6144,32,0,0.10743520259857178
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,6144,64,0,0.10066080093383789
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,6144,1,0,1.277847957611084
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,6144,4,0,0.4244688034057617
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,6144,2,0,0.6524496078491211
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,6144,8,0,0.22684800624847412
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,6144,16,0,0.14490079879760742
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,6144,1,0,1.2057456016540526
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,6144,32,0,0.11484320163726806
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,6144,64,0,0.10268640518188477
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,8192,4,0,0.5950399875640869
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,8192,2,0,1.0945152282714843
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,8192,8,0,0.34846560955047606
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,8192,16,0,0.22225120067596435
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,8192,32,0,0.1424463987350464
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,8192,64,0,0.13351360559463502
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,8192,1,0,2.2489248275756837
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,8192,2,0,1.0220527648925781
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,8192,16,0,0.21132640838623046
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,8192,4,0,0.8590432167053222
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,8192,8,0,0.32959039211273194
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,8192,32,0,0.14186400175094604
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,8192,64,0,0.20285439491271973
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,8192,1,0,1.896303939819336
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,10240,8,0,0.4900959968566895
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,10240,4,0,0.8464367866516114
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,10240,16,0,0.30419681072235105
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,10240,32,0,0.1882159948348999
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,10240,64,0,0.16474239826202391
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,10240,2,0,1.5997039794921875
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,10240,2,0,1.4303536415100098
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,10240,4,0,1.1541456222534179
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,10240,16,0,0.28498880863189696
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,10240,8,0,0.45602879524230955
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,10240,1,0,3.7813201904296876
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,10240,32,0,0.18184000253677368
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,10240,64,0,0.15990240573883058
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,10240,1,0,2.7716863632202147
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,12288,4,0,1.1561984062194823
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,12288,8,0,0.6625887870788574
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,12288,32,0,0.2586047887802124
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,12288,16,0,0.3937616109848022
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,12288,64,0,0.20097761154174804
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,12288,2,0,2.4987520217895507
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,12288,2,0,1.9419120788574218
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,12288,4,0,1.4825984001159669
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,12288,16,0,0.37380959987640383
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,12288,8,0,0.5957488059997559
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,12288,32,0,0.23853919506072999
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,12288,64,0,0.1875056028366089
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,12288,1,0,4.060116958618164
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,12288,1,0,6.621708679199219
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,16384,4,0,2.265737533569336
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,16384,8,0,1.6132543563842774
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,16384,32,0,0.5543776035308838
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,16384,16,0,0.8731792449951172
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,16384,64,0,0.3547136068344116
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,16384,2,0,5.049288177490235
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,16384,4,0,1.9828960418701171
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,16384,2,0,3.39713134765625
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,16384,8,0,0.9423600196838379
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,16384,32,0,0.3635967969894409
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,16384,16,0,0.6634543895721435
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,16384,64,0,0.2439903974533081
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,16384,1,0,11.12466049194336
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,16384,1,0,7.076979064941407
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,1,32768,8,0,4.925094223022461
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,1,32768,16,0,2.9243120193481444
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,1,32768,32,0,1.4649007797241211
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,1,32768,4,0,11.398033905029298
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,1,32768,64,0,0.7730095863342286
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,1,32768,2,0,21.77473602294922
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,1,32768,2,0,14.170050048828125
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,1,32768,4,0,6.892215728759766
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,1,32768,8,0,3.1616992950439453
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,1,32768,16,0,1.705855941772461
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,1,32768,32,0,1.127079963684082
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,1,32768,64,0,0.6371615886688232
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,1,1,0,0.01870719939470291
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,1,2,0,0.014519999921321868
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,1,4,0,0.01451680064201355
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,1,8,0,0.012532800436019897
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,1,16,0,0.014511999487876893
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,1,32,0,0.012443199753761292
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,1,64,0,0.012556800246238708
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,1,1,0,0.02493920028209686
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,1,4,0,0.02067999988794327
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,1,2,0,0.020664000511169435
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,1,8,0,0.020688000321388244
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,1,16,0,0.019276799261569978
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,1,32,0,0.020603199303150178
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,1,64,0,0.020735999941825865
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,16,1,0,0.01866080015897751
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,16,2,0,0.01454399973154068
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,16,4,0,0.014524799585342408
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,16,8,0,0.01454080045223236
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,16,16,0,0.012489599734544754
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,16,32,0,0.0125231996178627
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,16,64,0,0.012465599924325943
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,16,1,0,0.02685759961605072
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,16,2,0,0.022735999524593355
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,16,4,0,0.02067359983921051
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,16,8,0,0.02264000028371811
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,16,16,0,0.020707200467586517
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,16,32,0,0.02266560047864914
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,16,64,0,0.0206496000289917
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,32,1,0,0.020598399639129638
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,32,2,0,0.016103999316692354
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,32,4,0,0.014545600116252898
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,32,8,0,0.014553600549697876
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,32,16,0,0.014574399590492249
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,32,32,0,0.014547200500965118
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,32,64,0,0.01257600039243698
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,32,1,0,0.028880000114440918
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,32,2,0,0.022728000581264497
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,32,4,0,0.022732800245285033
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,32,8,0,0.02272160053253174
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,32,16,0,0.020664000511169435
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,32,32,0,0.02072640061378479
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,32,64,0,0.02066880017518997
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,64,1,0,0.024750399589538574
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,64,2,0,0.018667200207710268
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,64,4,0,0.01666560024023056
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,64,8,0,0.014524799585342408
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,64,16,0,0.014535999298095703
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,64,32,0,0.014575999975204468
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,64,64,0,0.014553600549697876
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,64,1,0,0.035016000270843506
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,64,2,0,0.026815998554229736
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,64,4,0,0.024796800315380098
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,64,8,0,0.022728000581264497
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,64,16,0,0.022734400629997254
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,64,32,0,0.022755199670791627
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,64,64,0,0.02274080067873001
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,128,1,0,0.02924799919128418
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,128,2,0,0.020684799551963805
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,128,8,0,0.01669279932975769
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,128,4,0,0.018764799833297728
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,128,16,0,0.01454399973154068
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,128,32,0,0.01677920073270798
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,128,64,0,0.014619199931621552
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,128,2,0,0.030943998694419862
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,128,1,0,0.04132960140705109
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,128,4,0,0.024747200310230255
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,128,8,0,0.024915200471878052
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,128,16,0,0.024585600197315215
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,128,32,0,0.022801600396633148
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,128,64,0,0.022758400440216063
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,256,2,0,0.028143998980522156
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,256,1,0,0.04348799884319306
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,256,4,0,0.022752000391483305
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,256,8,0,0.02080000042915344
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,256,16,0,0.018639999628067016
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,256,32,0,0.018775999546051025
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,256,64,0,0.01870400011539459
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,256,2,0,0.03911199867725372
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,256,1,0,0.05996639728546142
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,256,4,0,0.03441759943962097
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,256,8,0,0.028249600529670717
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,256,16,0,0.026787200570106508
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,256,32,0,0.026881599426269533
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,256,64,0,0.024784000217914583
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,1,32768,1,0,28.612118530273438
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,512,1,0,0.08714720010757446
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,512,2,0,0.05154880285263062
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,512,8,0,0.026927998661994933
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,512,16,0,0.024780799448490144
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,512,4,0,0.043244799971580504
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,512,32,0,0.022894400358200073
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,512,64,0,0.022732800245285033
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,512,1,0,0.1477679967880249
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,512,2,0,0.06944000124931335
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,512,8,0,0.047286400198936464
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,512,16,0,0.03107840120792389
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,512,4,0,0.043244799971580504
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,512,64,0,0.02884800136089325
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,512,32,0,0.03897919952869415
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,1024,1,0,0.18750879764556885
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,1024,4,0,0.08224959969520569
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,1024,2,0,0.10620959997177123
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,1024,8,0,0.03928320109844208
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,1024,16,0,0.04538559913635254
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,1024,64,0,0.030807998776435853
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,1024,32,0,0.03292959928512573
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,1024,4,0,0.07689440250396729
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,1024,1,0,0.2240607976913452
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,1024,2,0,0.12522239685058595
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,1024,16,0,0.04327360093593598
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,1024,8,0,0.05146880149841308
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,1024,32,0,0.04111199975013733
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,1024,64,0,0.037038400769233704
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,1536,1,0,0.30973598957061765
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,1536,2,0,0.17102240324020385
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,1536,4,0,0.10021439790725709
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,1536,8,0,0.06418399810791016
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,1536,16,0,0.045184001326560974
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,1536,32,0,0.0411871999502182
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,1536,64,0,0.03901279866695404
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,1536,2,0,0.19585280418395995
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,1536,1,0,0.35085439682006836
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,1536,4,0,0.1153216004371643
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,1536,8,0,0.07383520007133484
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,1536,16,0,0.05550720095634461
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,1536,32,0,0.049451199173927304
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,1536,64,0,0.04524160027503967
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,2048,2,0,0.2474287986755371
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,2048,1,0,0.4587679862976074
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,2048,4,0,0.14232640266418456
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,2048,8,0,0.08806880116462708
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,2048,16,0,0.055576002597808837
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,2048,32,0,0.051481598615646364
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,2048,64,0,0.047356799244880676
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,2048,2,0,0.273635196685791
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,2048,1,0,0.4990896224975586
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,1,32768,1,0,48.16600341796875
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,2048,8,0,0.09757120013237
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,2048,4,0,0.15795680284500122
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,2048,32,0,0.05762879848480225
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,2048,64,0,0.05343040227890015
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,2048,16,0,0.11974719762802125
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,3072,2,0,0.4346479892730713
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,3072,4,0,0.44765281677246094
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,3072,8,0,0.14540959596633912
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,3072,1,0,0.8220704078674317
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,3072,16,0,0.1401039958000183
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,3072,32,0,0.06983360052108764
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,3072,64,0,0.06508319973945617
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,3072,4,0,0.25814080238342285
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,3072,2,0,0.4555952072143555
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,3072,16,0,0.10040160417556762
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,3072,8,0,0.15496480464935303
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,3072,1,0,0.8422351837158203
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,3072,32,0,0.07743039727210999
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,3072,64,0,0.07189760208129883
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,4096,4,0,0.3728111982345581
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,4096,8,0,0.2194063901901245
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,4096,2,0,0.6657919883728027
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,4096,16,0,0.13657920360565184
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,4096,64,0,0.08245599865913392
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,4096,32,0,0.08809599876403809
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,4096,1,0,1.2707776069641112
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,4096,4,0,0.3771984100341797
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,4096,8,0,0.22164640426635743
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,4096,2,0,0.6719103813171386
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,4096,32,0,0.11687999963760376
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,4096,16,0,0.14229919910430908
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,4096,64,0,0.08829759955406188
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,4096,1,0,1.2643600463867188
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,6144,4,0,0.6818607807159424
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,6144,8,0,0.38974719047546386
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,6144,16,0,0.23388159275054932
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,6144,32,0,0.15398240089416504
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,6144,2,0,1.347264003753662
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,6144,64,0,0.11676160097122193
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,6144,4,0,0.6627439975738525
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,6144,8,0,0.3790719985961914
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,6144,2,0,1.3651568412780761
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,6144,1,0,2.9248079299926757
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,6144,16,0,0.23099040985107422
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,6144,32,0,0.155131196975708
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,6144,64,0,0.20288798809051514
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,6144,1,0,2.3314048767089846
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,8192,8,0,0.606006383895874
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,8192,4,0,1.328771209716797
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,8192,16,0,0.36112639904022215
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,8192,64,0,0.15723999738693237
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,8192,32,0,0.252126407623291
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,8192,2,0,2.199500846862793
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,8192,4,0,1.4312416076660157
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,8192,2,0,1.9136335372924804
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,8192,8,0,0.5869823932647705
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,8192,16,0,0.3687968015670776
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,8192,32,0,0.2320096015930176
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,8192,64,0,0.16820000410079955
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,8192,1,0,5.449619293212891
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,8192,1,0,3.832281494140625
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,10240,8,0,0.9339664459228516
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,10240,4,0,2.038979148864746
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,10240,16,0,0.6275407791137695
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,10240,32,0,0.42702717781066896
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,10240,64,0,0.262441611289978
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,10240,2,0,3.8624336242675783
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,10240,4,0,1.877672004699707
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,10240,2,0,2.796636772155762
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,10240,16,0,0.5163263797760009
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,10240,8,0,0.855128002166748
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,10240,32,0,0.3018719911575317
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,10240,64,0,0.211296010017395
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,10240,1,0,9.011756896972656
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,10240,1,0,5.69630241394043
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,12288,4,0,2.4497936248779295
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,12288,8,0,1.9046287536621094
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,12288,16,0,0.7928639888763428
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,12288,64,0,0.29347360134124756
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,12288,32,0,0.4078479766845703
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,12288,2,0,6.618355560302734
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,12288,4,0,2.2496688842773436
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,12288,2,0,4.092062377929688
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,12288,8,0,1.1818896293640138
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,12288,1,0,13.328176879882813
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,12288,32,0,0.38382079601287844
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,12288,16,0,0.7154592037200928
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,12288,64,0,0.4391039848327637
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,12288,1,0,8.863944244384765
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,16384,4,0,5.261569595336914
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,16384,8,0,2.2279823303222654
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,16384,32,0,1.0374367713928223
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,16384,16,0,1.442899227142334
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,16384,64,0,0.6764368057250977
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,16384,2,0,11.726681518554688
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,16384,2,0,7.4385520935058596
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,16384,4,0,3.9029422760009767
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,16384,16,0,1.1623215675354004
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,16384,8,0,1.8099103927612306
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,16384,32,0,0.6194672107696533
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,16384,64,0,0.4205296039581299
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,16384,1,0,14.941067504882813
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,16384,1,0,24.050840759277342
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,2,32768,8,0,11.443292999267578
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,2,32768,16,0,5.730972671508789
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,2,32768,32,0,2.8202768325805665
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,2,32768,4,0,23.149642944335938
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,2,32768,64,0,1.3283167839050294
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,2,32768,2,0,45.047064208984374
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,2,32768,2,0,29.06621398925781
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,2,32768,4,0,13.895770263671874
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,2,32768,16,0,3.1861040115356447
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,2,32768,8,0,7.0843650817871096
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,2,32768,32,0,1.7986799240112306
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,1,1,0,0.03246400058269501
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,1,2,0,0.022643199563026427
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,1,4,0,0.016524800658226015
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,1,8,0,0.014596800506114959
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,1,16,0,0.014532800018787383
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,1,32,0,0.014593599736690522
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,2,32768,64,0,1.1769935607910156
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,1,64,0,0.014552000164985656
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,1,1,0,0.031348800659179686
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,1,2,0,0.026793599128723145
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,1,4,0,0.022835199534893037
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,1,8,0,0.022700800001621245
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,1,16,0,0.02077919989824295
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,1,32,0,0.02064319998025894
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,1,64,0,0.020793600380420683
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,16,1,0,0.024769599735736846
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,16,2,0,0.01855680048465729
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,16,4,0,0.016499200463294984
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,16,8,0,0.014420799911022186
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,16,16,0,0.014529600739479065
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,16,32,0,0.014392000436782838
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,16,64,0,0.014475199580192565
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,16,1,0,0.034995201230049136
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,16,2,0,0.026815998554229736
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,16,4,0,0.022868800163269042
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,16,8,0,0.020764799416065217
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,16,16,0,0.020791999995708466
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,16,32,0,0.021134400367736818
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,16,64,0,0.021729600429534913
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,32,1,0,0.028857600688934327
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,32,2,0,0.020627200603485107
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,32,4,0,0.016599999368190767
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,32,8,0,0.014608000218868256
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,32,16,0,0.014596800506114959
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,32,32,0,0.014587199687957764
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,32,64,0,0.014521600306034088
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,32,2,0,0.026929599046707154
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,32,1,0,0.039105600118637084
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,32,8,0,0.0226623997092247
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,32,4,0,0.02465759962797165
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,32,16,0,0.022614400088787078
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,32,32,0,0.022732800245285033
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,32,64,0,0.022804799675941467
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,64,1,0,0.03514719903469086
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,64,2,0,0.02324319928884506
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,64,8,0,0.016575999557971954
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,64,4,0,0.016628800332546233
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,64,32,0,0.014523200690746307
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,64,16,0,0.0165120005607605
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,64,64,0,0.01451520025730133
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,64,2,0,0.03288640081882477
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,64,1,0,0.04530879855155945
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,64,4,0,0.026795199513435362
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,64,8,0,0.02459519952535629
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,64,16,0,0.022785599529743194
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,64,32,0,0.022672000527381896
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,64,64,0,0.020654399693012238
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,128,1,0,0.047307199239730834
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,128,2,0,0.028808000683784484
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,128,4,0,0.022617599368095397
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,128,8,0,0.01870719939470291
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,128,16,0,0.016740800440311433
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,128,32,0,0.014633600413799287
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,128,64,0,0.015996800363063814
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,128,1,0,0.06431840062141418
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,128,2,0,0.04104639887809754
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,128,4,0,0.031228798627853393
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,128,8,0,0.02685759961605072
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,128,16,0,0.024828800559043886
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,128,32,0,0.024668799340724946
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,128,64,0,0.02276960015296936
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,256,1,0,0.08028960227966309
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,256,2,0,0.04530560076236725
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,256,4,0,0.028935998678207397
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,256,8,0,0.024766400456428528
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,256,16,0,0.02083519995212555
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,256,32,0,0.018751999735832213
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,256,64,0,0.018723200261592864
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,256,1,0,0.10493279695510864
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,256,2,0,0.05977439880371094
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,256,4,0,0.041310399770736694
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,256,8,0,0.033073601126670835
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,256,16,0,0.030822399258613586
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,256,32,0,0.028935998678207397
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,256,64,0,0.026736000180244447
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,512,1,0,0.15794880390167237
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,512,2,0,0.08874239921569824
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,512,4,0,0.053457599878311154
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,512,8,0,0.033081600069999696
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,512,16,0,0.028998398780822755
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,512,32,0,0.026732799410820008
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,512,64,0,0.024905599653720856
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,512,1,0,0.19887520074844361
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,512,2,0,0.11216800212860108
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,512,4,0,0.0706928014755249
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,512,8,0,0.04521119892597199
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,512,16,0,0.039113599061965945
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,512,64,0,0.03099839985370636
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,512,32,0,0.03514240086078644
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,1024,2,0,0.18897279500961303
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,1024,1,0,0.35016000270843506
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,1024,4,0,0.10883200168609619
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,1024,8,0,0.06681600213050842
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,1024,32,0,0.03906559944152832
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,1024,16,0,0.043433600664138795
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,1024,64,0,0.035185599327087404
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,1024,2,0,0.2262432098388672
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,1024,1,0,0.40955681800842286
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,1024,4,0,0.12933119535446166
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,1024,8,0,0.07987679839134217
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,1024,16,0,0.053502398729324344
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,1024,64,0,0.043249601125717164
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,1024,32,0,0.04742879867553711
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,1536,2,0,0.3135711908340454
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,1536,1,0,0.5926576137542725
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,1536,4,0,0.17527999877929687
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,1536,8,0,0.10648640394210815
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,1536,16,0,0.06624799966812134
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,1536,32,0,0.04940159916877747
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,1536,64,0,0.04539999961853027
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,1536,2,0,0.3548111915588379
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,1536,1,0,0.6589072227478028
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,1536,4,0,0.19963040351867675
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,1536,8,0,0.1223904013633728
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,1536,16,0,0.08013759851455689
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,1536,32,0,0.059699201583862306
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,1536,64,0,0.05529119968414307
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,2048,2,0,0.45848641395568845
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,2048,1,0,0.8789119720458984
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,2048,4,0,0.25226559638977053
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,2048,8,0,0.14952000379562377
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,2048,16,0,0.09304640293121338
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,2048,32,0,0.06165120005607605
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,2048,64,0,0.05757759809494019
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,2048,2,0,0.5014768123626709
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,2048,1,0,0.9462896347045898
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,2048,4,0,0.2810944080352783
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,2048,8,0,0.16479040384292604
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,2048,16,0,0.10571520328521729
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,2048,32,0,0.07387359738349915
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,2048,64,0,0.06380800008773804
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,3072,2,0,0.8195551872253418
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,2,32768,1,0,57.66243286132813
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,3072,4,0,0.4825632095336914
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,3072,1,0,1.6589759826660155
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,3072,16,0,0.1679744005203247
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,3072,32,0,0.12516160011291505
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,3072,64,0,0.08831999897956848
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,3072,8,0,0.3078639984130859
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,3072,4,0,0.549131202697754
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,3072,2,0,0.9499600410461426
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,3072,8,0,0.2684848070144653
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,3072,32,0,0.11115839481353759
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,3072,16,0,0.16535040140151977
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,3072,64,0,0.08826720118522643
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,3072,1,0,1.652359962463379
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,4096,4,0,0.688318395614624
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,4096,2,0,1.2821776390075683
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,4096,8,0,0.38447999954223633
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,4096,16,0,0.22816159725189208
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,4096,32,0,0.148363196849823
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,4096,64,0,0.10801440477371216
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,4096,1,0,2.8114416122436525
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,4096,2,0,1.4007840156555176
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,4096,1,0,2.5072704315185548
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,4096,4,0,0.6849071979522705
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,4096,8,0,0.4040976047515869
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,4096,16,0,0.2350816011428833
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,4096,32,0,0.16469919681549072
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,4096,64,0,0.10982400178909302
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,6144,2,0,2.8605424880981447
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,2,32768,1,0,97.54714965820312
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,6144,4,0,1.3063152313232422
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,6144,8,0,0.9689776420593261
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,6144,64,0,0.1907647967338562
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,6144,16,0,0.7478015899658204
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,6144,32,0,0.29932639598846433
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,6144,4,0,1.3077407836914063
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,6144,1,0,7.0621795654296875
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,6144,8,0,0.6879776000976563
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,6144,2,0,2.4549280166625977
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,6144,32,0,0.25339679718017577
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,6144,16,0,0.5943935871124267
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,6144,64,0,0.1884112000465393
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,6144,1,0,4.78302230834961
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,8192,4,0,2.223964881896973
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,8192,8,0,1.1265520095825194
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,8192,16,0,0.9588624000549316
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,8192,32,0,0.4756959915161133
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,8192,64,0,0.35095040798187255
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,8192,2,0,5.499372863769532
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,8192,4,0,2.198543930053711
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,8192,2,0,4.357585525512695
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,8192,8,0,1.065116786956787
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,8192,32,0,0.37392001152038573
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,8192,16,0,0.7108975887298584
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,8192,64,0,0.25312960147857666
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,8192,1,0,12.207606506347656
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,8192,1,0,8.124454498291016
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,10240,8,0,1.6862895965576172
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,10240,16,0,1.0625375747680663
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,10240,32,0,0.6148863792419433
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,10240,4,0,4.306148910522461
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,10240,64,0,0.40239520072937013
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,10240,2,0,9.745159912109376
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,10240,4,0,3.1149007797241213
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,10240,2,0,6.211732864379883
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,10240,16,0,0.8687647819519043
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,10240,32,0,0.5243824005126954
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,10240,8,0,1.7283119201660155
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,10240,64,0,0.33683199882507325
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,10240,1,0,21.59119873046875
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,10240,1,0,13.319073486328126
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,12288,8,0,2.6901744842529296
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,12288,16,0,1.7691919326782226
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,12288,32,0,0.9362159729003906
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,12288,64,0,0.5463071823120117
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,12288,4,0,7.549094390869141
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,12288,2,0,13.65264434814453
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,12288,4,0,4.2294670104980465
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,12288,8,0,2.0601408004760744
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,12288,16,0,1.208340835571289
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,12288,32,0,0.6543824195861816
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,12288,2,0,9.082513427734375
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,12288,64,0,0.42432317733764646
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,12288,1,0,28.4567138671875
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,12288,1,0,18.116110229492186
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,4,16384,8,0,5.904683303833008
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,4,16384,4,0,11.738654327392577
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,4,16384,16,0,2.9792015075683596
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,4,16384,64,0,1.0293824195861816
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,4,16384,32,0,1.573748779296875
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,4,16384,2,0,23.51204833984375
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,4,16384,4,0,7.391671752929687
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,4,16384,2,0,15.229228210449218
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,4,16384,8,0,3.605547332763672
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,4,16384,16,0,1.8909967422485352
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,4,16384,32,0,1.1871279716491698
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,8,1,1,0,0.03792159855365753
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,4,16384,64,0,0.6635136127471923
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,1,2,0,0.02832320034503937
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,1,4,0,0.022438399493694305
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,1,8,0,0.014609600603580474
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,1,16,0,0.014424000680446625
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,1,32,0,0.014563199877738953
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,1,64,0,0.012545600533485413
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,8,1,1,0,0.039094400405883786
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,1,2,0,0.031625598669052124
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,1,4,0,0.026804798841476442
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,1,8,0,0.022833600640296936
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,1,16,0,0.020771199464797975
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,1,32,0,0.020708799362182617
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,1,64,0,0.02067680060863495
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,8,16,1,0,0.03707360029220581
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,16,2,0,0.026795199513435362
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,16,4,0,0.01852640062570572
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,16,8,0,0.014537599682807923
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,16,16,0,0.014521600306034088
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,16,32,0,0.014528000354766845
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,16,64,0,0.014483200013637542
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,8,16,1,0,0.047275200486183167
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,16,2,0,0.03296639919281006
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,16,4,0,0.0267984002828598
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,16,8,0,0.022724799811840057
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,16,16,0,0.02269279956817627
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,16,32,0,0.02271360009908676
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,16,64,0,0.020659199357032774
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,8,32,1,0,0.042289599776268005
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,32,2,0,0.0288239985704422
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,32,4,0,0.02077440023422241
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,32,8,0,0.01658399999141693
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,32,16,0,0.014523200690746307
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,32,32,0,0.014510400593280792
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,32,64,0,0.014529600739479065
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,8,32,1,0,0.0534496009349823
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,32,2,0,0.03707360029220581
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,32,4,0,0.028896000981330872
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,32,8,0,0.024753600358963013
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,32,16,0,0.02274720072746277
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,32,32,0,0.022710399329662324
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,32,64,0,0.022801600396633148
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,8,64,1,0,0.053592002391815184
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,64,2,0,0.035076799988746646
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,64,4,0,0.02476000040769577
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,64,8,0,0.01860480010509491
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,64,16,0,0.01680160015821457
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,64,32,0,0.014575999975204468
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,64,64,0,0.014502400159835815
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,8,64,1,0,0.06980800032615661
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,64,2,0,0.04736160039901734
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,64,4,0,0.03306080102920532
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,64,8,0,0.0268528014421463
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,64,16,0,0.02281759977340698
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,64,32,0,0.022678400576114654
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,64,64,0,0.022809599339962006
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,8,128,1,0,0.08080000281333924
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,128,2,0,0.047295999526977536
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,128,4,0,0.030859199166297913
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,128,8,0,0.022755199670791627
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,128,16,0,0.018529599905014037
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,128,32,0,0.01655679941177368
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,128,64,0,0.015603199601173401
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,128,2,0,0.06378399729728698
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,8,128,1,0,0.10840480327606201
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,128,4,0,0.041257598996162416
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,128,8,0,0.031246399879455565
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,128,16,0,0.026820799708366393
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,128,32,0,0.024940800666809083
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,128,64,0,0.02476159930229187
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,256,2,0,0.08020640015602112
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,8,256,1,0,0.13981759548187256
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,256,4,0,0.045865601301193236
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,256,8,0,0.03105440139770508
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,256,16,0,0.02677280008792877
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,256,64,0,0.020788800716400147
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,256,32,0,0.022951999306678773
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,256,2,0,0.10525120496749878
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,8,256,1,0,0.18322399854660035
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,256,8,0,0.04312160015106201
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,256,4,0,0.06398720145225525
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,256,16,0,0.03504000008106232
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,256,32,0,0.03102880120277405
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,256,64,0,0.02898240089416504
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,512,2,0,0.1613119959831238
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,8,512,1,0,0.2948240041732788
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,512,4,0,0.09071040153503418
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,512,8,0,0.0575007975101471
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,512,16,0,0.03683840036392212
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,512,32,0,0.03299840092658997
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,512,64,0,0.028889599442481994
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,512,2,0,0.20233280658721925
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,8,512,1,0,0.36458559036254884
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,512,4,0,0.1148527979850769
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,512,8,0,0.07189120054244995
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,512,16,0,0.04771040081977844
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,512,32,0,0.04112800061702728
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,512,64,0,0.03908160030841827
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,4,16384,1,0,31.260159301757813
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,1024,2,0,0.3547744035720825
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,1024,8,0,0.11442719697952271
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,1024,4,0,0.23998239040374755
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,8,1024,1,0,0.667632007598877
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,1024,16,0,0.07201439738273621
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,1024,64,0,0.043278399109840396
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,1024,32,0,0.059627199172973634
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,1024,4,0,0.29512639045715333
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,1024,2,0,0.4166224002838135
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,1024,8,0,0.16883360147476195
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,1024,16,0,0.08803679943084716
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,8,1024,1,0,0.7792655944824218
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,4,16384,1,0,51.54886474609375
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,1024,32,0,0.06175360083580017
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,1024,64,0,0.05341759920120239
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,1536,4,0,0.32421278953552246
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,1536,8,0,0.1849295973777771
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,1536,16,0,0.11332319974899292
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,1536,32,0,0.07639520168304444
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,1536,64,0,0.05761600136756897
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,8,1536,1,0,1.1399951934814454
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,1536,2,0,1.0342448234558106
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,1536,2,0,0.670084810256958
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,1536,4,0,0.3918015956878662
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,1536,8,0,0.21164000034332275
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,1536,16,0,0.14178240299224854
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,1536,32,0,0.09036960005760193
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,8,1536,1,0,1.2640128135681152
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,1536,64,0,0.0760320007801056
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,2048,4,0,0.47386879920959474
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,2048,2,0,0.8787664413452149
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,2048,8,0,0.26658239364624026
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,2048,32,0,0.10700160264968872
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,2048,16,0,0.15970720052719117
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,2048,64,0,0.07391200065612794
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,8,2048,1,0,1.7141712188720704
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,2048,4,0,0.6268239974975586
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,2048,2,0,0.9589872360229492
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,2048,16,0,0.1801792025566101
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,2048,8,0,0.29625439643859863
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,2048,64,0,0.08830879926681519
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,2048,32,0,0.12087359428405761
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,8,2048,1,0,1.8276607513427734
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,3072,4,0,0.8366399765014648
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,3072,8,0,0.45961761474609375
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,3072,2,0,1.6641584396362306
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,3072,16,0,0.2696592092514038
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,3072,32,0,0.21341440677642823
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,3072,64,0,0.12074879407882691
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,8,3072,1,0,3.653452682495117
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,3072,4,0,0.8788623809814453
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,3072,2,0,1.6474895477294922
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,3072,16,0,0.29151999950408936
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,3072,32,0,0.18737280368804932
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,3072,64,0,0.13402400016784669
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,3072,8,0,0.6536064147949219
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,8,3072,1,0,3.2223838806152343
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,4096,4,0,1.3184399604797363
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,4096,16,0,0.41608800888061526
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,4096,8,0,0.7294159889221191
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,4096,32,0,0.284987211227417
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,4096,64,0,0.1745136022567749
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,4096,2,0,2.8795520782470705
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,4096,4,0,1.643280029296875
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,4096,2,0,2.5968399047851562
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,4096,8,0,0.7183775901794434
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,4096,16,0,0.44274239540100097
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,8,4096,1,0,6.295609664916992
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,4096,32,0,0.26452159881591797
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,4096,64,0,0.19731680154800416
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,8,4096,1,0,4.965991973876953
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,6144,8,0,1.3471199989318847
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,6144,4,0,2.790947151184082
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,6144,32,0,0.6061520099639892
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,6144,16,0,0.8562848091125488
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,6144,64,0,0.3183471918106079
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,6144,2,0,6.46978530883789
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,6144,4,0,2.542241668701172
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,6144,2,0,5.204867172241211
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,6144,16,0,0.8175536155700683
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,6144,8,0,1.3471055984497071
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,6144,64,0,0.3008944034576416
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,6144,32,0,0.49585280418395994
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,8,6144,1,0,14.8187744140625
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,8,6144,1,0,10.27194595336914
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,8,8192,8,0,2.3678783416748046
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,8,8192,16,0,1.6670591354370117
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,8,8192,32,0,0.9018560409545898
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,8,8192,64,0,0.547318410873413
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,8,8192,4,0,6.355148696899414
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,8,8192,2,0,13.543595886230468
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,8,8192,4,0,4.512540817260742
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,8,8192,2,0,8.172531127929688
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,8,8192,16,0,1.2034255981445312
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,8,8192,8,0,1.9959903717041017
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,8,8192,64,0,0.48076801300048827
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,8,8192,32,0,0.6587967872619629
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,16,1,1,0,0.0652400016784668
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,1,2,0,0.03489120006561279
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,1,4,0,0.02686559855937958
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,1,8,0,0.01884479969739914
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,1,16,0,0.016809600591659545
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,1,32,0,0.014499199390411378
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,1,64,0,0.016564799845218657
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,16,1,1,0,0.05958240032196045
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,1,2,0,0.04528320133686066
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,1,4,0,0.030921599268913268
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,1,8,0,0.02693760097026825
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,1,16,0,0.02274720072746277
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,1,32,0,0.022603200376033784
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,1,64,0,0.020656000077724456
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,16,16,1,0,0.061643201112747195
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,16,2,0,0.037027201056480406
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,16,4,0,0.02609120011329651
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,16,8,0,0.018624000251293182
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,16,16,0,0.016603200137615202
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,16,32,0,0.01465280055999756
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,16,64,0,0.014679999649524688
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,16,16,1,0,0.07270240187644958
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,16,2,0,0.04724160134792328
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,16,4,0,0.032979199290275575
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,16,8,0,0.02699199914932251
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,16,16,0,0.022870400547981264
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,16,32,0,0.022711999714374542
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,16,64,0,0.022881600260734557
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,16,32,1,0,0.0738655984401703
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,32,2,0,0.043119999766349795
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,32,4,0,0.02893120050430298
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,32,8,0,0.020851199328899384
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,32,16,0,0.01674720048904419
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,32,32,0,0.014657600224018097
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,32,64,0,0.014703999459743499
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,16,32,1,0,0.08824959993362427
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,32,2,0,0.05362719893455505
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,8,8192,1,0,26.089633178710937
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,32,4,0,0.039211198687553406
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,32,16,0,0.024728000164031982
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,32,8,0,0.04531520009040833
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,32,32,0,0.022932800650596618
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,32,64,0,0.03298240005970001
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,16,64,1,0,0.10154080390930176
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,64,2,0,0.09651039838790894
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,64,4,0,0.03516640067100525
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,64,16,0,0.018598400056362152
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,64,8,0,0.026807999610900878
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,64,64,0,0.01664319932460785
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,64,32,0,0.016726399958133697
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,64,2,0,0.0718895971775055
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,16,64,1,0,0.12578560113906861
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,64,4,0,0.04729120135307312
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,64,8,0,0.03513120114803314
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,64,16,0,0.026892799139022826
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,64,32,0,0.024792000651359558
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,64,64,0,0.024928000569343568
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,8,8192,1,0,17.802735900878908
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,16,128,1,0,0.15004479885101318
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,128,2,0,0.0822160005569458
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,128,8,0,0.03096800148487091
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,128,4,0,0.05761600136756897
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,128,16,0,0.02280319929122925
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,128,32,0,0.01860480010509491
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,128,64,0,0.02083040028810501
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,128,4,0,0.06562880277633668
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,16,128,1,0,0.19653279781341554
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,128,2,0,0.10738240480422974
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,128,8,0,0.04342080056667328
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,128,32,0,0.028977599740028382
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,128,16,0,0.033055999875068666
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,128,64,0,0.028857600688934327
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,256,2,0,0.14351359605789185
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,256,4,0,0.08352000117301941
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,16,256,1,0,0.2678352117538452
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,256,16,0,0.03304319977760315
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,256,8,0,0.049439999461174014
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,256,32,0,0.02892799973487854
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,256,64,0,0.026700800657272337
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,256,4,0,0.10877920389175415
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,256,2,0,0.18656159639358522
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,16,256,1,0,0.34253759384155275
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,256,8,0,0.06724479794502258
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,256,16,0,0.045244801044464114
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,256,32,0,0.039017599821090695
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,256,64,0,0.035129600763320924
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,512,4,0,0.1673743963241577
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,512,8,0,0.09697759747505189
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,512,2,0,0.2996864080429077
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,512,16,0,0.061737602949142455
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,512,32,0,0.04126560091972351
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,16,512,1,0,0.5676815986633301
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,512,64,0,0.039027199149131775
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,512,8,0,0.12130719423294067
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,512,4,0,0.2091360092163086
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,512,2,0,0.37079999446868894
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,512,32,0,0.0554751992225647
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,512,16,0,0.07828480005264282
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,512,64,0,0.04934560060501099
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,16,512,1,0,0.6950160026550293
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,1024,8,0,0.20842559337615968
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,1024,4,0,0.3651263952255249
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,1024,16,0,0.12652959823608398
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,1024,2,0,0.6849679946899414
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,1024,32,0,0.0843999981880188
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,1024,64,0,0.059596800804138185
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,1024,4,0,0.4321199893951416
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,16,1024,1,0,1.3068783760070801
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,1024,16,0,0.14970879554748534
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,1024,8,0,0.24807360172271728
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,1024,2,0,0.7914080142974853
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,1024,64,0,0.07477599978446961
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,1024,32,0,0.10481280088424683
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,16,1024,1,0,1.5142592430114745
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,1536,4,0,0.629145622253418
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,1536,8,0,0.3398576021194458
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,1536,16,0,0.2058624029159546
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,1536,64,0,0.09447839856147766
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,1536,32,0,0.13155360221862794
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,1536,2,0,1.163646411895752
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,1536,4,0,0.7087488174438477
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,16,1536,1,0,2.2058223724365233
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,1536,16,0,0.2537712097167969
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,1536,8,0,0.39657599925994874
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,1536,2,0,1.2910016059875489
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,1536,32,0,0.17395520210266113
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,1536,64,0,0.1132207989692688
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,16,1536,1,0,2.4885311126708984
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,2048,2,0,1.8170528411865234
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,2048,4,0,0.9077887535095215
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,2048,16,0,0.2938751935958862
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,2048,8,0,0.564027214050293
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,2048,32,0,0.18951040506362915
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,2048,64,0,0.14734079837799072
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,16,2048,1,0,3.4098159790039064
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,2048,4,0,1.2340543746948243
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,2048,2,0,1.8567823410034179
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,2048,16,0,0.32356479167938235
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,2048,8,0,0.5435711860656738
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,2048,32,0,0.2091088056564331
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,2048,64,0,0.14975520372390747
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,16,2048,1,0,3.613332748413086
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,3072,8,0,0.8792464256286621
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,3072,16,0,0.49730558395385743
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,3072,4,0,1.641444778442383
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,3072,32,0,0.30507678985595704
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,3072,64,0,0.21611518859863282
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,3072,2,0,3.3302783966064453
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,3072,4,0,1.969233512878418
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,3072,2,0,3.2352542877197266
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,3072,8,0,0.9174655914306641
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,3072,32,0,0.3326672077178955
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,3072,16,0,0.5368175983428956
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,3072,64,0,0.2293087959289551
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,16,3072,1,0,8.415585327148438
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,16,3072,1,0,6.464211273193359
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,16,4096,8,0,1.371014404296875
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,16,4096,16,0,0.7791071891784668
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,16,4096,4,0,3.5600528717041016
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,16,4096,32,0,0.4717120170593262
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,16,4096,64,0,0.4201519966125488
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,16,4096,2,0,6.123796844482422
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,16,4096,4,0,2.5428911209106446
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,16,4096,8,0,1.3625679969787599
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,16,4096,2,0,5.159273529052735
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,16,4096,16,0,0.7706255912780762
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,16,4096,32,0,0.49651198387145995
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,32,1,1,0,0.13048800230026245
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,16,4096,64,0,0.32140960693359377
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,1,2,0,0.061268800497055055
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,1,4,0,0.03305279910564422
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,1,8,0,0.02287680059671402
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,1,16,0,0.018729600310325622
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,1,32,0,0.014718399941921234
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,1,64,0,0.014585599303245544
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,32,1,1,0,0.09658240079879761
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,1,2,0,0.05757759809494019
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,1,8,0,0.030907198786735535
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,1,4,0,0.04132960140705109
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,1,16,0,0.02685439884662628
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,1,32,0,0.022655999660491942
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,1,64,0,0.020684799551963805
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,32,16,1,0,0.11148320436477661
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,16,2,0,0.06194080114364624
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,16,4096,1,0,13.801847839355469
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,16,4,0,0.03864000141620636
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,16,8,0,0.02667199969291687
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,16,32,0,0.014681600034236908
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,16,64,0,0.014431999623775482
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,16,16,0,0.02465119957923889
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,16,2,0,0.07197440266609192
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,32,16,1,0,0.1233024001121521
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,16,4,0,0.06405760049819946
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,16,8,0,0.03500320017337799
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,16,16,0,0.02890399992465973
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,16,32,0,0.031057599186897277
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,16,64,0,0.022729599475860597
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,32,32,1,0,0.13879200220108032
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,32,2,0,0.0887935996055603
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,32,4,0,0.04322560131549835
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,32,16,0,0.021294400095939636
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,32,8,0,0.029073598980903625
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,32,32,0,0.01658719927072525
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,32,64,0,0.016470399498939515
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,32,2,0,0.08832160234451295
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,32,32,1,0,0.1560927987098694
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,32,4,0,0.0534991979598999
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,32,8,0,0.039136001467704774
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,32,16,0,0.03088639974594116
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,32,32,0,0.02481919974088669
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,32,64,0,0.024719999730587007
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,32,64,1,0,0.1879536032676697
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,64,2,0,0.10260319709777832
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,64,4,0,0.05702880024909973
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,64,8,0,0.037196800112724304
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,64,32,0,0.018638400733470915
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,64,16,0,0.02691200077533722
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,16,4096,1,0,10.075630187988281
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,64,64,0,0.018775999546051025
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,64,2,0,0.12705279588699342
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,64,8,0,0.04934560060501099
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,64,4,0,0.07605760097503662
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,32,64,1,0,0.23138880729675293
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,64,16,0,0.03527039885520935
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,64,32,0,0.028067201375961304
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,64,64,0,0.02884959876537323
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,128,4,0,0.09030240178108215
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,128,2,0,0.1520848035812378
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,32,128,1,0,0.2814768075942993
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,128,8,0,0.05345439910888672
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,128,16,0,0.035155200958251955
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,128,32,0,0.024747200310230255
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,128,64,0,0.02260800004005432
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,128,2,0,0.19919519424438475
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,128,4,0,0.11260479688644409
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,32,128,1,0,0.3633663892745972
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,128,8,0,0.06777600049972535
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,128,16,0,0.04726400077342987
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,128,32,0,0.036985599994659425
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,128,64,0,0.032923200726509096
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,256,4,0,0.1491711974143982
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,256,2,0,0.2726671934127808
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,256,8,0,0.0883023977279663
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,256,16,0,0.05547040104866028
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,32,256,1,0,0.5100880146026612
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,256,32,0,0.03816959857940674
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,256,64,0,0.03391999900341034
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,256,4,0,0.19405440092086793
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,256,2,0,0.3472127914428711
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,256,8,0,0.11585119962692261
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,256,16,0,0.07392799854278564
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,32,256,1,0,0.6500048160552978
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,256,32,0,0.053446400165557864
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,256,64,0,0.045296001434326175
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,512,4,0,0.3104991912841797
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,512,2,0,0.576913595199585
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,512,8,0,0.17796319723129272
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,512,16,0,0.10875519514083862
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,512,32,0,0.07323520183563233
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,512,64,0,0.05351999998092651
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,32,512,1,0,1.1073472023010253
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,512,4,0,0.38696000576019285
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,512,2,0,0.7092127799987793
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,512,8,0,0.22167201042175294
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,512,16,0,0.13552160263061525
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,512,32,0,0.09440799951553344
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,32,512,1,0,1.3515199661254882
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,512,64,0,0.070033597946167
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,1024,4,0,0.703443193435669
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,1024,16,0,0.23254079818725587
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,1024,8,0,0.39164159297943113
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,1024,32,0,0.1496783971786499
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,1024,2,0,1.3182736396789552
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,1024,64,0,0.10781279802322388
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,32,1024,1,0,2.601318359375
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,1024,4,0,0.8251248359680176
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,1024,16,0,0.27728800773620604
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,1024,2,0,1.5430480003356934
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,1024,8,0,0.48383197784423826
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,1024,64,0,0.13125599622726442
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,1024,32,0,0.18058240413665771
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,32,1024,1,0,2.984868812561035
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,1536,4,0,1.196132755279541
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,1536,16,0,0.3761807918548584
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,1536,8,0,0.6501455783843995
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,1536,64,0,0.16694719791412355
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,1536,32,0,0.2508752107620239
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,1536,2,0,2.236577606201172
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,1536,4,0,1.4810511589050293
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,32,1536,1,0,4.927964782714843
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,1536,8,0,0.7307375907897949
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,1536,2,0,2.534516716003418
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,1536,32,0,0.277294397354126
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,1536,64,0,0.19814879894256593
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,1536,16,0,0.511291217803955
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,32,1536,1,0,4.923027038574219
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,32,2048,4,0,1.761742401123047
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,32,2048,16,0,0.5682112216949463
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,32,2048,8,0,0.953377628326416
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,32,2048,32,0,0.33510239124298097
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,32,2048,64,0,0.23208799362182617
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,32,2048,2,0,3.7770736694335936
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,32,2048,4,0,2.027587127685547
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,32,2048,2,0,3.6964752197265627
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,32,2048,8,0,1.045587158203125
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,32,2048,1,0,8.26184310913086
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,32,2048,32,0,0.38280160427093507
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,32,2048,16,0,0.6259344100952149
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,64,1,1,0,0.1683135986328125
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,64,1,2,0,0.09043999910354614
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,32,2048,64,0,0.3188960075378418
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,64,1,4,0,0.05151680111885071
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,64,1,8,0,0.033020800352096556
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,64,1,16,0,0.022735999524593355
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,64,1,32,0,0.018691200017929076
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,64,1,64,0,0.016475200653076172
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,64,1,4,0,0.05956320166587829
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,64,1,1,0,0.1743199944496155
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,64,1,2,0,0.09679999947547913
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,64,1,8,0,0.04108000099658966
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,64,1,16,0,0.030924800038337707
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,64,1,32,0,0.026361599564552307
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,64,1,64,0,0.02263679951429367
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,64,16,2,0,0.11393760442733765
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,64,16,1,0,0.21269760131835938
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,64,16,4,0,0.06343200206756591
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,64,16,8,0,0.03904640078544617
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,64,16,16,0,0.026940798759460448
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,64,16,32,0,0.020615999400615693
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,64,16,64,0,0.016756799817085267
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,64,16,2,0,0.1252511978149414
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,64,16,1,0,0.22860479354858398
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,64,16,4,0,0.07203360199928284
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,64,16,8,0,0.0472815990447998
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,64,16,16,0,0.03508639931678772
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,64,16,32,0,0.028942400217056276
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,32,2048,1,0,7.175244903564453
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,64,16,64,0,0.0248416006565094
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,64,32,1,0,0.2626768112182617
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,64,32,4,0,0.07618560194969178
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,64,32,2,0,0.13914560079574584
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,64,32,16,0,0.03289920091629028
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,64,32,8,0,0.04491199851036072
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,64,32,32,0,0.022758400440216063
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,64,32,64,0,0.018592000007629395
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,64,32,1,0,0.2909888029098511
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,64,32,4,0,0.09037759900093079
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,64,32,2,0,0.15808000564575195
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,64,32,32,0,0.030904000997543334
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,64,32,16,0,0.041289600729942325
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,64,32,8,0,0.05560320019721985
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,64,32,64,0,0.026897600293159483
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,64,64,2,0,0.19051680564880372
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,64,64,4,0,0.10521440505981446
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,64,64,1,0,0.36003680229187013
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,64,64,8,0,0.059854400157928464
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,64,64,16,0,0.03909600079059601
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,64,64,32,0,0.02892000079154968
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,64,64,64,0,0.02284960001707077
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,64,64,4,0,0.1309712052345276
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,64,64,2,0,0.23538560867309571
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,64,64,1,0,0.4344816207885742
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,64,64,8,0,0.0759935975074768
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,64,64,16,0,0.05145599842071533
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,64,64,32,0,0.03912160098552704
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,64,64,64,0,0.03300960063934326
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,64,128,4,0,0.1581727981567383
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,64,128,2,0,0.28834240436553954
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,64,128,8,0,0.09021120071411133
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,64,128,1,0,0.5441504001617432
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,64,128,16,0,0.05681920051574707
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,64,128,32,0,0.039136001467704774
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,64,128,64,0,0.03287039995193482
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,64,128,4,0,0.2072671890258789
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,64,128,2,0,0.36947999000549314
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,64,128,8,0,0.11887680292129517
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,64,128,16,0,0.07608799934387207
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,64,128,1,0,0.6975584030151367
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,64,128,64,0,0.04321439862251282
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,64,128,32,0,0.05348160266876221
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,64,256,4,0,0.2850224018096924
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,64,256,2,0,0.5221807956695557
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,64,256,8,0,0.16053919792175292
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,64,256,32,0,0.06604160070419311
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,64,256,64,0,0.05135359764099121
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,64,256,16,0,0.10097600221633911
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,64,256,1,0,0.9992992401123046
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,64,256,8,0,0.20781919956207276
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,64,256,4,0,0.3637712001800537
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,64,256,16,0,0.12961920499801635
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,64,256,2,0,0.6656144142150879
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,64,256,64,0,0.06576640009880066
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,64,256,32,0,0.0905247986316681
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,64,256,1,0,1.2653200149536132
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,64,512,4,0,0.6004240036010742
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,64,512,8,0,0.3326672077178955
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,64,512,32,0,0.1312064051628113
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,64,512,16,0,0.19985920190811157
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,64,512,2,0,1.1277440071105957
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,64,512,64,0,0.09644160270690919
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,64,512,1,0,2.172108840942383
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,64,512,4,0,0.7368624210357666
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,64,512,16,0,0.25102078914642334
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,64,512,8,0,0.4287568092346191
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,64,512,2,0,1.383078384399414
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,64,512,32,0,0.16562720537185668
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,64,512,64,0,0.12806400060653686
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,64,512,1,0,2.6739871978759764
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,64,1024,4,0,1.3717424392700195
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,64,1024,8,0,0.7523056030273437
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,64,1024,16,0,0.43393759727478026
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,64,1024,32,0,0.27702879905700684
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,64,1024,64,0,0.19673919677734375
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,64,1024,2,0,2.6582544326782225
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,64,1024,4,0,1.6130224227905274
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,64,1024,1,0,5.3897216796875
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,64,1024,8,0,0.8780415534973145
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,64,1024,2,0,3.0414255142211912
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,64,1024,32,0,0.331987190246582
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,64,1024,16,0,0.561956787109375
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,64,1024,64,0,0.239900803565979
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,128,1,4,0,0.09041759967803956
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,128,1,2,0,0.18832160234451295
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,128,1,1,0,0.3270303964614868
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,128,1,8,0,0.051475197076797485
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,128,1,16,0,0.03100000023841858
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,128,1,64,0,0.01676799952983856
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,128,1,32,0,0.024726399779319765
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,128,1,2,0,0.173089599609375
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,128,1,1,0,0.3246767997741699
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,128,1,4,0,0.09652159810066223
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,128,1,8,0,0.059671998023986816
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,128,1,16,0,0.039201599359512326
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,128,1,32,0,0.030905601382255555
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,128,1,64,0,0.026833599805831908
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,128,16,2,0,0.21486239433288573
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,128,16,1,0,0.41246719360351564
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,128,16,4,0,0.11329599618911743
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,128,16,8,0,0.06496959924697876
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,64,1024,1,0,5.939575958251953
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,128,16,32,0,0.026867198944091796
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,128,16,16,0,0.04106079936027527
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,128,16,64,0,0.0226160004734993
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,128,16,2,0,0.2308255910873413
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,128,16,4,0,0.1253600001335144
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,128,16,1,0,0.4294559955596924
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,128,16,8,0,0.07480800151824951
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,128,16,16,0,0.04935680031776428
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,128,16,32,0,0.03707680106163025
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,128,16,64,0,0.028934401273727418
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,128,32,4,0,0.1416991949081421
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,128,32,2,0,0.26746559143066406
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,128,32,1,0,0.5117263793945312
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,128,32,16,0,0.04740799963474274
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,128,32,8,0,0.07992640137672424
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,128,32,32,0,0.035087999701499936
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,128,32,64,0,0.02479359954595566
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,128,32,4,0,0.16206079721450806
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,128,32,2,0,0.2948319911956787
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,128,32,1,0,0.5516464233398437
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,128,32,8,0,0.09242399930953979
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,128,32,16,0,0.05963839888572693
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,128,32,32,0,0.045281600952148435
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,128,32,64,0,0.03501279950141907
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,128,64,4,0,0.19690879583358764
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,128,64,2,0,0.3659424066543579
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,128,64,8,0,0.11050239801406861
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,128,64,16,0,0.06571199893951415
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,128,64,1,0,0.7017280101776123
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,128,64,32,0,0.045238399505615236
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,128,64,64,0,0.03493599891662598
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,128,64,4,0,0.24038560390472413
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,128,64,2,0,0.4404463768005371
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,128,64,16,0,0.08446080088615418
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,128,64,8,0,0.1381824016571045
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,128,64,1,0,0.8392911911010742
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,128,64,32,0,0.05956159830093384
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,128,64,64,0,0.04727360010147095
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,128,128,4,0,0.2990992069244385
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,128,128,8,0,0.1685328006744385
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,128,128,16,0,0.10265599489212036
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,128,128,2,0,0.5583807945251464
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,128,128,32,0,0.06895359754562377
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,128,128,64,0,0.05140320062637329
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,128,128,1,0,1.0738544464111328
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,128,128,4,0,0.3829024076461792
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,128,128,8,0,0.2221247911453247
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,128,128,16,0,0.1345039963722229
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,128,128,2,0,0.7104991912841797
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,128,128,32,0,0.09186400175094604
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,128,128,64,0,0.06983839869499206
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,128,128,1,0,1.3617823600769043
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,128,256,8,0,0.30532159805297854
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,128,256,4,0,0.5429920196533203
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,128,256,32,0,0.12295839786529542
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,128,256,16,0,0.1818176031112671
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,128,256,2,0,1.0198111534118652
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,128,256,64,0,0.09029600024223328
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,128,256,1,0,1.95372314453125
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,128,256,4,0,0.6928016185760498
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,128,256,16,0,0.2374336004257202
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,128,256,2,0,1.2998928070068358
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,128,256,8,0,0.38946559429168703
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,128,256,32,0,0.16086560487747192
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,128,256,64,0,0.11732319593429566
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,128,256,1,0,2.5027952194213867
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,128,512,4,0,1.1727519989013673
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,128,512,8,0,0.6438576221466065
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,128,512,16,0,0.37936480045318605
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,128,512,32,0,0.24434239864349366
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,128,512,64,0,0.1766703963279724
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,128,512,2,0,2.2307855606079103
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,128,512,4,0,1.4607583999633789
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,128,512,1,0,4.267465591430664
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,128,512,8,0,0.7965472221374512
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,128,512,2,0,2.731110382080078
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,128,512,16,0,0.4803775787353516
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,128,512,32,0,0.30948638916015625
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,128,512,64,0,0.22476480007171631
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,256,1,4,0,0.17392640113830565
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,256,1,2,0,0.3583055973052979
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,256,1,1,0,0.6403920173645019
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,256,1,8,0,0.09315040111541747
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,256,1,16,0,0.0540336012840271
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,256,1,32,0,0.03300159871578216
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,256,1,64,0,0.02276960015296936
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,256,1,2,0,0.3277616024017334
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,256,1,4,0,0.17444319725036622
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,256,1,1,0,0.6279456138610839
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,128,512,1,0,5.30570068359375
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,256,1,8,0,0.09821919798851013
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,256,1,16,0,0.05769439935684204
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,256,1,64,0,0.03091840147972107
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,256,1,32,0,0.039961600303649904
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,256,16,4,0,0.2182255983352661
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,256,16,2,0,0.41574721336364745
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,256,16,1,0,0.7950511932373047
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,256,16,8,0,0.11659519672393799
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,256,16,16,0,0.06802719831466675
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,256,16,64,0,0.030987200140953065
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,256,16,32,0,0.04333600103855133
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,256,16,4,0,0.23500959873199462
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,256,16,2,0,0.4340047836303711
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,256,16,1,0,0.8295503616333008
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,256,16,16,0,0.07792320251464843
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,256,16,8,0,0.12887040376663209
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,256,16,32,0,0.05206720232963562
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,256,16,64,0,0.03906719982624054
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,256,32,4,0,0.2719599962234497
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,256,32,2,0,0.5165823936462403
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,256,32,8,0,0.14888960123062134
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,256,32,16,0,0.08421120047569275
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,256,32,1,0,0.9857359886169433
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,256,32,32,0,0.05345919728279114
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,256,32,64,0,0.03920960128307342
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,256,32,4,0,0.301580810546875
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,256,32,2,0,0.5600016117095947
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,256,32,8,0,0.1703760027885437
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,256,32,16,0,0.10043840408325196
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,256,32,1,0,1.0734432220458985
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,256,32,64,0,0.05138400197029114
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,256,32,32,0,0.06588000059127808
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,256,64,4,0,0.37702720165252684
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,256,64,2,0,0.7104976177215576
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,256,64,8,0,0.2086416006088257
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,256,64,16,0,0.12322720289230346
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,256,64,32,0,0.078056001663208
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,256,64,64,0,0.05761759877204895
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,256,64,1,0,1.3637120246887207
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,256,64,4,0,0.4569263935089111
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,256,64,8,0,0.25592479705810545
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,256,64,2,0,0.8516991615295411
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,256,64,16,0,0.1538272023200989
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,256,64,32,0,0.09956160187721252
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,256,64,64,0,0.07192639708518982
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,256,64,1,0,1.6444143295288085
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,256,128,4,0,0.5796639919281006
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,256,128,8,0,0.3203007936477661
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,256,128,2,0,1.0875679969787597
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,256,128,16,0,0.19264800548553468
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,256,128,32,0,0.12543679475784303
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,256,128,64,0,0.09050719738006592
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,256,128,1,0,2.108572769165039
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,256,128,4,0,0.738102388381958
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,256,128,8,0,0.4121407985687256
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,256,128,16,0,0.2480799913406372
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,256,128,2,0,1.3897007942199706
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,256,128,32,0,0.1631999969482422
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,256,128,64,0,0.12087359428405761
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,256,128,1,0,2.6923967361450196
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,32,256,256,4,0,1.0589360237121581
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,16,256,256,8,0,0.5895296096801758
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,8,256,256,16,0,0.34842720031738283
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,4,256,256,32,0,0.22784800529479982
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,2,256,256,64,0,0.1666432023048401
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,64,256,256,2,0,2.010312080383301
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,32,256,256,4,0,1.3551631927490235
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,float16,128,256,256,1,0,3.8747425079345703
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,16,256,256,8,0,0.7527775764465332
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,8,256,256,16,0,0.44725918769836426
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,64,256,256,2,0,2.5578432083129883
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,2,256,256,64,0,0.21873760223388672
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,4,256,256,32,0,0.29521760940551756
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,float16,fp8,128,256,256,1,0,4.970100784301758
