framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,1,1,1,0,1.1350640133023262
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,1,4,1,0,0.9778160005807877
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,1,8,1,0,1.0170679986476898
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,1,16,1,0,0.9004480019211769
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,1,32,1,0,1.0230359956622124
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,1,64,1,0,0.9729760065674782
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,1,128,1,0,1.649132028222084
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,1,256,1,0,1.717531979084015
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,1,512,1,0,1.4351239949464798
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,1,1024,1,0,1.3202359974384308
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,1,2048,1,0,1.7813720107078552
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,1,4096,1,0,2.271332025527954
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,1,8192,1,0,3.7399080097675323
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,1,16384,1,0,12.41215991973877
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,2,4,1,0,1.0549160093069077
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,2,8,1,0,1.067791998386383
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,2,16,1,0,1.0054319873452187
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,2,32,1,0,0.9467199966311455
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,2,64,1,0,0.9250719994306564
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,2,128,1,0,0.9491479992866516
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,2,256,1,0,0.953284002840519
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,2,512,1,0,0.9714720025658607
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,2,1024,1,0,1.087799996137619
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,2,2048,1,0,1.496372014284134
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,2,4096,1,0,2.684100031852722
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,2,8192,1,0,7.775988042354584
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,2,16384,1,0,25.693172216415405
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,4,4,1,0,0.8894240036606789
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,4,8,1,0,0.892812006175518
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,4,16,1,0,0.9094239994883537
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,4,32,1,0,0.8948320001363754
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,4,64,1,0,0.8817680105566978
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,4,128,1,0,0.8967879861593246
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,4,256,1,0,0.9188319966197014
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,4,512,1,0,0.9915600195527077
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,4,1024,1,0,1.3240240067243576
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,4,2048,1,0,2.200119972229004
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,4,4096,1,0,5.04991602897644
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,4,8192,1,0,16.110495924949646
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,4,16384,1,0,51.85909557342529
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,8,4,1,0,0.9124040082097054
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,8,8,1,0,0.9032239988446236
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,8,16,1,0,0.9099719971418381
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,8,32,1,0,0.8989160060882568
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,8,64,1,0,0.9266199991106987
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,8,128,1,0,0.8808920010924339
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,8,256,1,0,0.9820359945297241
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,8,512,1,0,1.267304003238678
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,8,1024,1,0,2.0096720159053802
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,8,2048,1,0,4.031044065952301
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,8,4096,1,0,10.985719919204712
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,8,8192,1,0,32.83556056022644
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,1,1,1,0,1.0151760205626488
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,1,4,1,0,1.0168799981474876
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,1,8,1,0,1.041603997349739
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,1,16,1,0,1.0503240078687668
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,1,32,1,0,1.0597200095653534
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,1,64,1,0,1.0430000126361847
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,1,128,1,0,1.0456960052251816
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,1,256,1,0,1.0535960048437119
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,1,512,1,0,1.0768279880285263
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,1,1024,1,0,1.0466079860925674
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,1,2048,1,0,1.0563919991254807
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,1,4096,1,0,1.3637559711933136
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,1,8192,1,0,2.19936802983284
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,1,16384,1,0,5.0734440088272095
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,2,4,1,0,0.9140040054917336
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,2,8,1,0,0.9064159989356995
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,2,16,1,0,0.9968159943819046
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,2,32,1,0,0.970720000565052
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,2,64,1,0,0.9697679951786995
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,2,128,1,0,0.9777040034532547
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,2,256,1,0,0.9833800047636032
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,2,512,1,0,0.9781640022993088
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,2,1024,1,0,1.0007359981536865
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,2,2048,1,0,1.1874639838933945
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,2,4096,1,0,1.725943997502327
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,2,8192,1,0,3.478680044412613
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,2,16384,1,0,12.70224380493164
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,4,4,1,0,0.9587279930710793
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,4,8,1,0,1.0437639951705933
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,4,16,1,0,0.9652559980750084
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,4,32,1,0,0.9383720010519028
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,4,64,1,0,0.9346640035510063
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,4,128,1,0,0.9340440034866333
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,4,256,1,0,0.8719600066542625
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,4,512,1,0,0.9149880036711693
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,4,1024,1,0,1.0520799905061722
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,4,2048,1,0,1.472104012966156
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,4,4096,1,0,2.6532959938049316
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,4,8192,1,0,7.04829204082489
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,4,16384,1,0,25.7423198223114
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,8,4,1,0,1.076984003186226
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,8,8,1,0,1.047667995095253
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,8,16,1,0,1.0248720049858093
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,8,32,1,0,1.020664006471634
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,8,64,1,0,1.0146400034427643
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,8,128,1,0,1.031943991780281
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,8,256,1,0,1.017788015305996
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,8,512,1,0,1.0979399979114532
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,8,1024,1,0,1.4272039979696274
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,8,2048,1,0,2.264912039041519
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,8,4096,1,0,5.091107964515686
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,8,8192,1,0,16.052576065063477
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,8,16384,1,0,109.91735935211182
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,8,16384,1,0,51.041356563568115
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,1,1,1,0,0.9899040013551712
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,1,4,1,0,0.9984000027179718
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,1,8,1,0,0.9959719926118851
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,1,16,1,0,0.9873439967632294
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,1,32,1,0,0.981840007007122
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,1,64,1,0,1.0086919963359833
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,1,128,1,0,0.9764320030808449
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,1,256,1,0,0.9819919988512993
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,1,512,1,0,0.9711960032582283
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,1,1024,1,0,0.9718960002064705
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,1,2048,1,0,0.9629880040884018
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,1,4096,1,0,1.0537440031766891
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,1,8192,1,0,1.5155719965696335
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,1,16384,1,0,2.9616400003433228
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,2,4,1,0,0.9321639984846115
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,2,8,1,0,0.947843998670578
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,2,16,1,0,0.935528002679348
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,2,32,1,0,0.9560759961605072
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,2,64,1,0,0.9426160007715225
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,2,128,1,0,0.9356200024485588
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,2,256,1,0,0.9210559949278831
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,2,512,1,0,0.9120159968733788
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,2,1024,1,0,0.922715999186039
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,2,2048,1,0,0.9408360049128532
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,2,4096,1,0,1.2864360064268112
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,2,8192,1,0,2.3906919956207275
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,2,16384,1,0,5.552836000919342
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,4,4,1,0,1.3611960113048553
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,4,8,1,0,1.3630240112543106
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,4,16,1,0,1.3745280057191849
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,4,32,1,0,1.2988679707050323
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,4,64,1,0,1.276655986905098
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,4,128,1,0,1.2634440064430237
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,4,256,1,0,1.2516119927167892
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,4,512,1,0,1.223688006401062
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,4,1024,1,0,1.221876010298729
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,4,2048,1,0,1.334079995751381
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,4,4096,1,0,1.8357200026512146
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,4,8192,1,0,3.6227999925613403
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,4,16384,1,0,13.211419939994812
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,8,4,1,0,0.9118440076708794
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,8,8,1,0,0.9146200120449066
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,8,16,1,0,0.8904799968004227
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,8,32,1,0,0.9117040038108826
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,8,64,1,0,0.9102040007710457
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,8,128,1,0,0.9028880000114441
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,8,256,1,0,0.9107399955391884
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,8,512,1,0,0.9390520080924034
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,8,1024,1,0,1.0470639914274216
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,8,2048,1,0,1.4396920055150986
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,8,4096,1,0,2.756435990333557
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,8,8192,1,0,7.615595996379852
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,8,16384,1,0,26.75457191467285
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,16,4,1,0,0.944423995912075
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,16,8,1,0,0.9351759999990463
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,16,16,1,0,0.9096959978342056
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,16,32,1,0,0.9165719971060753
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,16,64,1,0,0.9425920024514198
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,16,128,1,0,1.018719993531704
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,16,256,1,0,1.2552840113639832
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,16,512,1,0,1.903768002986908
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,16,1024,1,0,3.6518600285053253
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,16,2048,1,0,8.649167895317078
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,16,4096,1,0,22.99506378173828
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,16,4,1,0,0.9423720017075539
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,16,8,1,0,0.9327159970998764
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,16,16,1,0,0.9430720061063766
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,16,32,1,0,0.9284960031509399
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,16,64,1,0,0.9273119941353798
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,16,128,1,0,0.9555519968271255
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,16,256,1,0,1.0208439901471138
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,16,512,1,0,1.285280004143715
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,16,1024,1,0,2.044760048389435
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,16,2048,1,0,4.095467984676361
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,16,4096,1,0,11.34312391281128
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,16,4,1,0,1.3014359921216965
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,16,8,1,0,1.2583440095186234
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,16,16,1,0,1.158824011683464
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,16,32,1,0,1.1538319885730743
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,16,64,1,0,1.1065919995307922
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,16,128,1,0,1.073427990078926
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,16,256,1,0,1.0764120221138
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,16,512,1,0,1.1042399853467941
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,16,1024,1,0,1.4047559946775436
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,16,2048,1,0,2.3245560228824615
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,16,4096,1,0,5.394848048686981
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,32,4,1,0,1.1515520066022873
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,32,8,1,0,1.1072800010442734
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,32,16,1,0,1.0286840051412582
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,32,32,1,0,1.081727996468544
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,32,64,1,0,1.1770479828119278
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,32,128,1,0,1.3314399868249893
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,32,256,1,0,1.9024679958820343
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,32,512,1,0,3.4307880103588104
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,32,1024,1,0,7.208992004394531
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,32,2048,1,0,18.089947938919067
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,32,4096,1,0,45.94436454772949
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,32,4,1,0,1.5668240040540695
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,32,8,1,0,1.563844010233879
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,32,16,1,0,1.5606480091810226
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,32,32,1,0,1.5412879884243011
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,32,64,1,0,1.5512759983539581
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,32,128,1,0,1.616004005074501
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,32,256,1,0,1.7590879946947098
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,32,512,1,0,2.326172024011612
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,32,1024,1,0,3.876283973455429
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,32,2048,1,0,8.744975924491882
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,32,4096,1,0,22.28280019760132
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,1,1,1,0,1.057479977607727
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,1,4,1,0,1.1735039949417114
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,1,8,1,0,1.0911279767751694
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,1,16,1,0,1.054940015077591
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,1,32,1,0,1.0418999791145325
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,1,64,1,0,1.0496719926595688
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,1,128,1,0,1.0488040000200272
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,1,256,1,0,1.056807979941368
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,1,512,1,0,1.050467997789383
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,1,1024,1,0,1.0678080022335052
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,1,2048,1,0,1.0428760051727295
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,1,4096,1,0,1.0187559947371483
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,1,8192,1,0,1.245635986328125
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,1,16384,1,0,2.022823989391327
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,2,4,1,0,1.0307000130414963
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,2,8,1,0,1.0243519991636276
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,2,16,1,0,1.0064999982714653
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,2,32,1,0,1.0158400014042854
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,2,64,1,0,1.0154159888625145
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,2,128,1,0,0.9946279898285866
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,2,256,1,0,0.9944000020623207
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,2,512,1,0,0.9605680033564568
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,2,1024,1,0,0.9706959873437881
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,2,2048,1,0,0.9439520016312599
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,2,4096,1,0,1.025504007935524
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,2,8192,1,0,1.5255679935216904
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,2,16384,1,0,3.0127640068531036
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,4,4,1,0,0.9646320044994354
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,4,8,1,0,0.9572200030088425
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,4,16,1,0,0.9894119948148727
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,4,32,1,0,0.9499079883098602
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,4,64,1,0,0.9277720004320145
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,4,128,1,0,0.9439960047602654
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,4,256,1,0,0.9358159974217415
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,4,512,1,0,0.9232080057263374
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,4,1024,1,0,0.9249839931726456
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,4,2048,1,0,0.939751997590065
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,4,4096,1,0,1.2705319970846176
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,4,8192,1,0,2.116320013999939
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,4,16384,1,0,5.869839906692505
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,8,4,1,0,0.9073280021548271
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,8,8,1,0,0.9142759963870049
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,8,16,1,0,0.8949479907751083
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,8,32,1,0,0.8985399976372719
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,8,64,1,0,0.9005599990487099
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,8,128,1,0,0.9804919958114624
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,8,256,1,0,0.895224004983902
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,8,512,1,0,0.887188009917736
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,8,1024,1,0,0.9107199981808662
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,8,2048,1,0,1.1210040003061295
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,8,4096,1,0,1.6761839985847473
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,64,4,1,0,1.8195119947195053
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,8,8192,1,0,3.786352038383484
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,64,8,1,0,1.8915840089321136
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,64,16,1,0,1.8977120071649551
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,64,32,1,0,1.9038399755954742
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,64,64,1,0,2.115656018257141
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,32,4,1,0,0.9277800098061562
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,64,128,1,0,2.0090959817171097
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,8,16384,1,0,13.004976153373718
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,32,8,1,0,1.0061840042471886
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,32,16,1,0,0.9165600016713142
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,16,4,1,0,0.9284839928150177
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,32,32,1,0,0.9129239991307259
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,16,8,1,0,0.9185239970684052
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,64,256,1,0,3.344748020172119
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,32,64,1,0,0.9267159998416901
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,16,16,1,0,1.1351679861545563
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,32,128,1,0,0.9625959992408752
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,16,32,1,0,0.9252519980072975
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,16,64,1,0,0.9178880006074905
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,32,256,1,0,1.014804020524025
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,16,128,1,0,0.9100960046052933
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,32,512,1,0,1.2890679836273193
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,16,256,1,0,0.8967160061001778
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,16,512,1,0,0.9362640008330345
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,32,1024,1,0,2.129891961812973
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,16,1024,1,0,1.079252004623413
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,64,512,1,0,6.72350800037384
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,16,2048,1,0,1.4669880121946335
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,32,2048,1,0,4.399911999702454
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,16,4096,1,0,3.0413279831409454
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,32,4,1,0,0.9648439958691597
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,32,8,1,0,1.0313880071043968
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,32,16,1,0,0.9774440005421638
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,32,32,1,0,0.9319920018315315
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,32,64,1,0,0.9367519989609718
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,32,128,1,0,0.9272880032658577
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,32,256,1,0,0.962411992251873
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,32,512,1,0,1.0725039839744568
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,64,1024,1,0,15.665555953979492
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,32,4096,1,0,11.980211853981018
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,32,1024,1,0,1.398939996957779
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,64,4,1,0,1.1443320214748383
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,32,2048,1,0,2.6150040328502655
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,64,8,1,0,1.1479759961366653
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,64,16,1,0,1.0786040276288986
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,64,32,1,0,1.1010599881410599
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,64,64,1,0,1.022292010486126
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,64,128,1,0,1.10275998711586
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,32,4096,1,0,5.878859996795654
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,64,256,1,0,1.3750759959220886
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,64,4,1,0,1.095648005604744
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,64,512,1,0,2.054883986711502
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,64,8,1,0,1.1131359785795212
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,64,16,1,0,1.1078119724988937
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,64,32,1,0,1.1255119889974594
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,64,1024,1,0,4.185775905847549
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,64,64,1,0,1.1079680025577545
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,64,128,1,0,1.1284279972314835
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,64,256,1,0,1.246291995048523
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,64,512,1,0,1.483475998044014
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,64,1024,1,0,2.4709599912166595
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,64,2048,1,0,9.781643986701965
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,64,2048,1,0,5.026432037353516
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,64,2048,1,0,37.782567501068115
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,64,4,1,0,1.0427079796791077
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,64,8,1,0,1.0162680000066757
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,64,16,1,0,1.1467680037021637
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,64,32,1,0,1.1066519916057587
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,64,64,1,0,1.2310639917850494
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,64,128,1,0,1.4032919853925705
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,64,256,1,0,1.9378120005130768
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,64,512,1,0,3.5255320072174072
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,64,1024,1,0,7.195819973945618
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,64,2048,1,0,18.123831748962402
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,128,4,1,0,1.1379039883613586
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,128,8,1,0,1.1583400219678879
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,128,16,1,0,1.1896560192108154
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,128,32,1,0,1.2103880047798157
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,128,64,1,0,1.311916008591652
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,128,128,1,0,1.4811999946832657
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,128,256,1,0,2.1032319962978363
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,128,512,1,0,3.847804009914398
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,128,4,1,0,1.122423991560936
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,128,8,1,0,1.1232279986143112
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,128,16,1,0,1.1078639775514603
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,128,1024,1,0,7.922404110431671
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,128,4,1,0,1.7555040121078491
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,128,32,1,0,1.1045879870653152
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,128,64,1,0,1.1435439884662628
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,128,128,1,0,1.2225079983472824
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,128,8,1,0,2.4094079732894897
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,128,256,1,0,1.451435998082161
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,128,16,1,0,2.569552004337311
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,128,512,1,0,2.4371199905872345
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,128,32,1,0,2.7529680132865906
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,128,1024,1,0,4.677339971065521
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,128,64,1,0,3.099159985780716
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,128,128,1,0,3.9265920221805573
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,128,256,1,0,6.723047971725464
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,128,512,1,0,14.820371985435486
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,128,1024,1,0,32.411336183547974
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,128,4,1,0,1.3539319932460785
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,128,8,1,0,1.3724839985370636
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,128,16,1,0,1.4127360135316849
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,128,32,1,0,1.516751989722252
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,128,64,1,0,1.6905759871006012
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,128,128,1,0,2.13604399561882
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,128,256,1,0,3.5185920000076294
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,128,512,1,0,6.907644033432007
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,128,1024,1,0,14.971652030944824
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,256,4,1,0,1.5525519996881485
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,256,8,1,0,1.5604639947414398
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,256,16,1,0,1.626128003001213
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,256,32,1,0,1.6629759967327118
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,256,64,1,0,1.8678520023822784
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,256,128,1,0,2.391468048095703
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,256,256,1,0,3.909036010503769
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,256,512,1,0,7.634899914264679
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,256,4,1,0,1.9459159970283508
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,256,8,1,0,1.8423639982938766
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,256,16,1,0,1.7558879852294922
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,256,32,1,0,1.7408399879932404
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,256,64,1,0,1.7994160056114197
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,256,128,1,0,1.9812360107898712
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,256,256,1,0,2.6193639636039734
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,256,512,1,0,4.70331996679306
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,256,4,1,0,3.3422200083732605
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,256,8,1,0,3.419704020023346
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,256,16,1,0,3.5508600175380707
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,256,32,1,0,3.7389160096645355
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,256,64,1,0,4.654547989368439
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,256,128,1,0,6.959416091442108
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,256,256,1,0,13.90202796459198
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,256,512,1,0,29.275304555892944
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,256,4,1,0,1.9410479813814163
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,256,8,1,0,1.914007991552353
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,256,16,1,0,2.0282559990882874
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,256,32,1,0,2.2086720168590546
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,256,64,1,0,2.6588400304317474
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,256,128,1,0,3.8906319737434387
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,256,256,1,0,6.681019961833954
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,256,512,1,0,14.28632390499115
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,512,4,1,0,2.278519958257675
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,512,8,1,0,2.3706800043582916
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,512,16,1,0,2.4517120122909546
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,512,32,1,0,2.6238159835338593
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,512,64,1,0,3.124500036239624
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,512,128,1,0,4.4687559604644775
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,512,256,1,0,7.5816240310668945
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,512,4,1,0,2.043740004301071
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,512,8,1,0,2.0484359860420227
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,512,16,1,0,2.083860009908676
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,512,32,1,0,2.2313120663166046
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,512,64,1,0,2.390023946762085
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,512,128,1,0,3.0828039944171906
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,512,256,1,0,4.9179840087890625
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,512,4,1,0,4.791256010532379
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,512,8,1,0,5.008520007133484
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,512,16,1,0,5.359891951084137
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,512,32,1,0,6.441891968250275
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,512,64,1,0,8.713055849075317
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,512,128,1,0,14.196900010108948
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,512,256,1,0,27.953912019729614
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,512,4,1,0,3.0594439804553986
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,512,8,1,0,3.0820280015468597
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,512,16,1,0,3.3052200078964233
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,512,32,1,0,3.7651199996471405
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,512,64,1,0,4.954444110393524
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,512,128,1,0,7.426980018615723
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,512,256,1,0,13.851984024047852
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,1024,4,1,0,3.9614199697971344
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,1024,8,1,0,4.004435986280441
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,1024,16,1,0,4.218320071697235
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,1024,32,1,0,4.730391979217529
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,1024,64,1,0,6.010884046554565
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,1024,4,1,0,3.252151995897293
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,32,1024,128,1,0,8.87067198753357
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,1024,8,1,0,3.2572999596595764
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,1024,16,1,0,3.3890559673309326
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,1024,32,1,0,3.5173839926719666
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,1024,64,1,0,4.285268008708954
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,16,1024,128,1,0,6.0093119740486145
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,1024,4,1,0,8.56335186958313
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,1024,8,1,0,8.932455897331238
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,1024,16,1,0,10.000724077224731
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,1024,32,1,0,12.48697578907013
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,1024,64,1,0,17.142592191696167
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,128,1024,128,1,0,28.854252099990845
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,1024,4,1,0,5.4952200055122375
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,1024,8,1,0,5.725347936153412
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,1024,16,1,0,6.441695928573608
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,1024,32,1,0,7.3102399706840515
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,1024,64,1,0,9.690332055091858
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,flashinfer,fp8_block,fp8,64,1024,128,1,0,14.928891897201538
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,1,1,1,0,1.0989599823951721
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,1,4,1,0,1.0804359912872314
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,1,8,1,0,1.0622639954090118
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,1,16,1,0,1.0513960123062134
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,1,32,1,0,1.0544440150260925
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,1,64,1,0,1.037019982933998
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,1,128,1,0,1.0540079921483994
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,1,256,1,0,1.0989840030670166
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,1,512,1,0,1.0399039834737778
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,1,1024,1,0,1.0482280105352402
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,1,2048,1,0,1.1661040037870407
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,1,4096,1,0,1.6860359907150269
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,1,8192,1,0,3.409915953874588
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,1,1,1,0,1.106088012456894
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,1,4,1,0,1.0896279960870743
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,1,8,1,0,1.1282039880752563
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,1,16,1,0,1.1311440020799637
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,1,32,1,0,1.1204160004854202
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,1,64,1,0,1.1295040249824524
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,1,128,1,0,1.1003799885511398
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,1,16384,1,0,12.993611931800842
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,1,256,1,0,1.100283995270729
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,1,512,1,0,1.1154919862747192
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,2,4,1,0,1.0260960087180138
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,1,1024,1,0,1.1093120127916336
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,2,8,1,0,1.0338679924607277
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,2,16,1,0,1.0123759731650352
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,1,2048,1,0,1.116016000509262
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,2,32,1,0,1.0535959899425507
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,1,4096,1,0,1.3241000175476074
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,2,64,1,0,1.0007119849324226
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,2,128,1,0,1.0481719970703125
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,1,8192,1,0,2.135615974664688
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,2,256,1,0,0.9953719973564148
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,2,512,1,0,1.0293279886245728
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,2,1024,1,0,1.0855159908533096
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,2,2048,1,0,1.4546440094709396
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,1,16384,1,0,5.824959993362427
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,2,4,1,0,1.1158040016889572
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,2,4096,1,0,2.551488012075424
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,2,8,1,0,1.1015480011701584
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,2,16,1,0,1.1023639887571335
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,2,32,1,0,1.1263720095157623
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,2,64,1,0,1.0851760059595108
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,2,128,1,0,1.0765079855918884
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,2,8192,1,0,7.21942001581192
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,2,256,1,0,1.0471080020070076
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,2,512,1,0,0.8691319972276688
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,2,1024,1,0,0.9135560020804405
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,2,2048,1,0,1.1689600050449371
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,2,4096,1,0,1.7157839834690094
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,2,8192,1,0,3.4285879731178284
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,2,16384,1,0,13.16509222984314
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,4,4,1,0,1.1628679931163788
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,2,16384,1,0,26.508212089538574
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,4,8,1,0,1.1243880093097687
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,4,16,1,0,1.1284959763288498
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,4,4,1,0,1.094876006245613
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,4,32,1,0,1.132887989282608
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,4,8,1,0,1.100731983780861
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,4,64,1,0,1.164683997631073
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,4,16,1,0,1.0878040194511414
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,4,32,1,0,1.0740960091352463
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,4,128,1,0,1.1631880104541779
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,4,64,1,0,1.0935920029878616
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,4,256,1,0,1.1568120121955872
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,4,128,1,0,1.1371319890022278
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,4,512,1,0,1.170983999967575
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,4,256,1,0,1.1035320162773132
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,4,1024,1,0,1.2427159994840622
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,4,512,1,0,1.155668005347252
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,4,2048,1,0,1.5676080286502838
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,4,1024,1,0,1.3994280099868774
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,4,4096,1,0,2.634123980998993
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,4,2048,1,0,2.231887996196747
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,4,4096,1,0,4.7066280245780945
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,4,8192,1,0,7.247515976428986
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,1,1,1,0,0.9098279923200607
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,1,4,1,0,0.9423599988222122
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,1,8,1,0,0.9437120035290718
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,1,16,1,0,0.9470079988241196
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,1,32,1,0,0.9476520046591759
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,1,64,1,0,0.958052009344101
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,1,128,1,0,0.9365439862012863
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,1,256,1,0,0.9535880014300346
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,1,512,1,0,0.9495839923620224
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,4,8192,1,0,16.697144150733948
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,1,1024,1,0,0.9590000137686729
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,1,2048,1,0,0.980223998427391
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,1,4096,1,0,1.013403981924057
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,1,8192,1,0,1.4681840240955353
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,1,16384,1,0,2.7990440130233765
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,2,4,1,0,0.9920959919691086
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,2,8,1,0,0.9801560044288635
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,4,16384,1,0,26.25463604927063
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,2,16,1,0,0.9726439937949181
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,2,32,1,0,1.0054999813437462
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,2,64,1,0,0.9826839864253998
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,8,4,1,0,1.2765440046787262
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,2,128,1,0,0.9920160099864006
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,8,8,1,0,1.2841919958591461
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,2,256,1,0,0.9590440019965172
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,8,16,1,0,1.2985119968652725
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,2,512,1,0,0.9969680085778236
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,2,1024,1,0,1.0604079961776733
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,8,32,1,0,1.2400760054588318
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,2,2048,1,0,1.1926079988479614
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,8,64,1,0,1.0630160048604012
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,8,128,1,0,1.0317479968070984
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,2,4096,1,0,1.3900759816169739
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,8,256,1,0,1.0436440110206604
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,2,8192,1,0,2.2109519839286804
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,8,512,1,0,1.120928019285202
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,8,1024,1,0,1.3335480093955994
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,8,2048,1,0,2.117428034543991
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,2,16384,1,0,5.20904803276062
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,4,4,1,0,1.1087239980697632
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,4,8,1,0,1.122647985816002
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,8,4096,1,0,5.052851974964142
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,4,16,1,0,1.0680959969758987
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,4,32,1,0,1.0824880003929138
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,4,64,1,0,1.0926759988069534
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,4,128,1,0,1.0187040120363235
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,4,256,1,0,1.0103559866547585
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,4,512,1,0,1.0534320026636124
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,4,1024,1,0,1.054024025797844
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,4,2048,1,0,1.1844479888677597
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,4,4096,1,0,1.7412640154361725
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,4,8192,1,0,3.3092439770698547
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,8,8192,1,0,16.732416033744812
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,4,16384,1,0,59.79800367355347
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,8,4,1,0,0.9987439811229706
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,8,8,1,0,0.9650480002164841
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,8,16,1,0,0.9525759965181351
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,8,32,1,0,0.9735560044646263
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,8,64,1,0,0.975663997232914
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,8,128,1,0,0.9870439916849136
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,4,16384,1,0,12.793935894966125
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,8,256,1,0,1.0485399961471558
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,8,512,1,0,1.2498160153627396
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,8,4,1,0,1.0129960030317307
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,8,8,1,0,0.8316559940576553
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,8,1024,1,0,1.8276279866695404
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,8,16,1,0,0.8207440003752708
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,8,32,1,0,0.8475720062851906
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,8,64,1,0,0.8725560083985329
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,8,128,1,0,0.8681560009717941
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,8,2048,1,0,3.775328040122986
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,8,256,1,0,0.888824000954628
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,8,512,1,0,0.8646920099854469
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,8,1024,1,0,0.9380239993333817
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,8,2048,1,0,1.3101039975881577
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,8,4096,1,0,2.4947679936885834
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,8,4096,1,0,11.342912077903748
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,8,8192,1,0,6.833619952201843
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,8,16384,1,0,56.7130241394043
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,16,4,1,0,1.165200024843216
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,16,8,1,0,1.144779995083809
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,8,16384,1,0,26.61651587486267
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,16,16,1,0,1.1503079682588577
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,16,32,1,0,1.1712479889392853
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,16,4,1,0,0.8599399998784065
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,16,64,1,0,1.132404014468193
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,16,8,1,0,0.8636720031499863
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,16,16,1,0,0.803056001663208
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,16,128,1,0,1.2931839898228645
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,16,32,1,0,0.8331600055098534
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,8,8192,1,0,36.51240396499634
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,16,64,1,0,0.8714679852128029
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,16,256,1,0,1.2084439992904663
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,16,128,1,0,0.8867320045828819
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,16,512,1,0,1.4207919985055923
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,16,256,1,0,0.8803720027208328
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,16,512,1,0,0.9188240021467209
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,16,1024,1,0,2.0576880276203156
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,16,1024,1,0,1.2165759950876236
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,16,2048,1,0,2.098271995782852
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,16,2048,1,0,4.007928043603897
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,16,4096,1,0,4.849783957004547
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,32,4,1,0,1.408723995089531
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,32,8,1,0,1.4524959921836853
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,32,16,1,0,1.4579800069332123
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,16,4096,1,0,11.679468035697937
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,32,32,1,0,1.4608280211687088
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,32,64,1,0,1.455132007598877
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,32,4,1,0,1.0774919986724854
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,32,8,1,0,1.0745840221643448
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,32,128,1,0,1.4346439987421036
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,32,16,1,0,1.0585960000753403
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,32,256,1,0,1.1291079968214035
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,32,32,1,0,1.0324639976024628
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,32,512,1,0,1.1703400015830994
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,32,64,1,0,1.05764801800251
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,32,1024,1,0,1.8960919827222824
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,32,128,1,0,1.1106760054826736
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,32,256,1,0,1.1237879991531372
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,32,512,1,0,1.8503880202770233
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,32,2048,1,0,3.9655839800834656
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,32,1024,1,0,3.455051988363266
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,32,4096,1,0,11.529784083366394
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,32,2048,1,0,8.63812804222107
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,64,4,1,0,0.8530000001192093
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,64,8,1,0,0.7901640012860298
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,64,16,1,0,0.8066440001130104
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,64,32,1,0,0.8270959928631783
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,64,64,1,0,0.8513519987463951
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,64,128,1,0,0.9083119928836823
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,64,256,1,0,1.1156119853258133
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,64,512,1,0,1.793963998556137
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,64,1024,1,0,3.6075759828090668
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,32,4096,1,0,23.650728464126587
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,64,2048,1,0,8.984367966651917
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,64,4,1,0,1.0978559851646423
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,64,8,1,0,1.0552919954061508
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,128,4,1,0,1.039243996143341
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,64,16,1,0,1.0425879955291748
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,128,8,1,0,0.8970679938793182
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,64,32,1,0,1.0603120028972626
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,128,16,1,0,0.8773399963974953
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,64,64,1,0,1.1000839918851852
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,128,32,1,0,0.8376839980483055
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,64,128,1,0,1.285191997885704
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,128,64,1,0,0.9507959932088852
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,64,256,1,0,1.7696840167045593
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,128,128,1,0,1.2558440119028091
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,128,256,1,0,1.9452880024909973
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,64,512,1,0,3.2410080432891846
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,128,512,1,0,3.4478079676628113
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,64,1024,1,0,7.595084071159363
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,128,1024,1,0,7.602104008197784
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,256,4,1,0,1.026263989508152
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,256,8,1,0,1.0368920117616653
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,8,16384,1,0,116.41538333892822
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,256,16,1,0,1.0805040150880814
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,256,32,1,0,1.1619399935007095
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,16,4,1,0,1.0629680007696152
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,16,8,1,0,1.060595989227295
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,256,64,1,0,1.3575280159711838
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,64,2048,1,0,19.03910803794861
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,16,16,1,0,1.0302879810333252
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,16,32,1,0,1.0827359855175018
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,256,128,1,0,1.811931997537613
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,16,64,1,0,1.0979360044002533
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,128,4,1,0,1.0592759996652603
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,16,128,1,0,0.8920839950442314
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,128,8,1,0,1.0169320181012154
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,16,256,1,0,1.0998159795999527
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,128,16,1,0,1.0615879893302917
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,256,256,1,0,3.294455975294113
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,16,512,1,0,1.7317400127649307
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,128,32,1,0,1.1421239972114563
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,128,64,1,0,1.3534480035305023
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,16,1024,1,0,3.3496640026569366
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,128,128,1,0,1.7395079880952835
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,256,512,1,0,6.759216010570526
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,128,256,1,0,3.166287988424301
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,16,2048,1,0,9.375064015388489
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,512,4,1,0,1.0696839839220047
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,512,8,1,0,1.1718759983778
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,128,512,1,0,6.716136038303375
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,512,16,1,0,1.2676600068807602
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,512,32,1,0,1.465735986828804
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,512,64,1,0,1.9422799795866013
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,512,128,1,0,3.3194319903850555
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,128,1024,1,0,16.74918806552887
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,16,4096,1,0,24.16341233253479
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,256,4,1,0,1.2199599891901016
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,32,4,1,0,1.1274079978466034
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,32,8,1,0,0.7844720035791397
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,256,8,1,0,1.0949679762125015
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,32,16,1,0,1.0814680010080338
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,512,256,1,0,6.713963985443115
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,32,32,1,0,0.8050320148468018
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,256,16,1,0,1.223715990781784
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,32,64,1,0,0.9156119972467422
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,256,32,1,0,1.446603998541832
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,32,128,1,0,1.3183640092611313
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,32,256,1,0,1.8682319968938828
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,256,64,1,0,1.9062279909849167
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,1024,4,1,0,1.4720399975776672
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,32,512,1,0,3.2147760093212128
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,256,128,1,0,3.2057000398635864
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,1024,8,1,0,1.5898720026016235
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,32,1024,1,0,7.600615859031677
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,1024,16,1,0,1.7665399760007858
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,256,256,1,0,6.46205198764801
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,1024,32,1,0,2.241155982017517
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,1024,64,1,0,3.6417479813098907
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,256,512,1,0,15.313508033752441
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,32,2048,1,0,19.04298424720764
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,32,1024,128,1,0,6.701879978179932
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,512,4,1,0,1.6564479917287827
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,512,8,1,0,1.7164239883422852
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,512,16,1,0,1.8195679932832718
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,512,32,1,0,2.2693320214748383
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,512,64,1,0,3.4721639752388
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,512,128,1,0,6.579611957073212
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,32,4096,1,0,50.923168659210205
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,512,256,1,0,14.129243850708008
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,64,4,1,0,0.9405200034379959
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,64,8,1,0,0.9933519959449768
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,64,16,1,0,0.9751320034265518
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,64,32,1,0,1.0761480033397675
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,64,64,1,0,1.2865799963474274
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,1024,4,1,0,2.1915640234947205
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,64,128,1,0,1.7338640242815018
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,64,256,1,0,3.1189320385456085
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,1024,8,1,0,2.348904013633728
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,64,512,1,0,6.6645559668540955
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,1024,16,1,0,2.752819985151291
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,1024,32,1,0,4.03219997882843
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,64,1024,1,0,16.962363958358765
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,1024,64,1,0,7.4520920515060425
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,64,1024,128,1,0,13.528263926506042
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,64,2048,1,0,39.85671615600586
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,128,4,1,0,1.0595999956130981
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,128,8,1,0,1.1078600138425827
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,128,16,1,0,1.1885920017957687
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,128,32,1,0,1.3823919743299484
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,128,64,1,0,1.8688400089740753
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,128,128,1,0,3.136507958173752
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,128,256,1,0,6.813096046447754
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,128,512,1,0,14.761792063713074
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,128,1024,1,0,35.2328519821167
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,256,4,1,0,1.4286480098962784
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,256,8,1,0,1.49480801820755
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,256,16,1,0,1.6799800097942352
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,256,32,1,0,2.1490519642829895
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,256,64,1,0,3.551820009946823
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,256,128,1,0,6.338848054409027
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,256,256,1,0,13.796576023101807
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,256,512,1,0,30.80166792869568
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,512,4,1,0,2.1542919874191284
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,512,8,1,0,2.3051480054855347
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,512,16,1,0,2.740939974784851
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,512,32,1,0,4.182532072067261
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,512,64,1,0,7.686287939548492
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,512,128,1,0,14.958215951919556
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,1,1,1,0,1.4581560045480728
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,1,4,1,0,1.5180159956216812
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,1,8,1,0,1.5393279939889908
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,1,16,1,0,1.5514480024576187
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,1,32,1,0,1.4676720201969147
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,512,256,1,0,30.083664417266846
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,1,64,1,0,1.5132440030574799
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,1,128,1,0,1.442679986357689
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,1,256,1,0,1.2805280089378357
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,1,512,1,0,1.3838480114936829
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,1,1024,1,0,1.37050399184227
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,1,2048,1,0,1.3857879936695099
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,1,4096,1,0,1.4450079947710037
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,1024,4,1,0,3.5014000236988068
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,1,8192,1,0,1.5215160101652145
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,1,16384,1,0,2.256812036037445
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,2,4,1,0,1.4217279851436615
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,1024,8,1,0,4.080727934837341
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,2,8,1,0,0.984532006084919
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,2,16,1,0,1.0301560163497925
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,2,32,1,0,1.1204919815063477
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,2,64,1,0,1.1296360045671463
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,1024,16,1,0,5.667732000350952
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,2,128,1,0,1.1495560109615326
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,2,256,1,0,1.3996399939060211
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,2,512,1,0,1.2050999999046326
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,2,1024,1,0,1.2314079850912094
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,2,2048,1,0,1.259215995669365
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,2,4096,1,0,1.2889879941940308
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,2,8192,1,0,1.7154320180416107
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,1024,32,1,0,9.343799948692322
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,2,16384,1,0,3.0929399728775024
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,4,4,1,0,1.4137279987335205
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,4,8,1,0,1.3897719979286194
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,4,16,1,0,1.394471988081932
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,4,32,1,0,1.373420000076294
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,4,64,1,0,1.3887600153684616
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,4,128,1,0,1.3372439742088318
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,4,256,1,0,1.3043399900197983
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,4,512,1,0,1.3236400187015533
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,4,1024,1,0,1.3557840138673782
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,4,2048,1,0,1.4006000012159348
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,1024,64,1,0,17.238131761550903
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,4,4096,1,0,1.5602199882268906
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,4,8192,1,0,2.3875999748706818
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,4,16384,1,0,5.54503208398819
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,8,4,1,0,1.4508440047502518
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,8,8,1,0,1.3570159822702408
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,8,16,1,0,1.2923080176115036
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,8,32,1,0,1.346699982881546
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,8,64,1,0,1.420152023434639
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,8,128,1,0,1.4286640286445618
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,8,256,1,0,1.5118280202150345
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,8,512,1,0,1.4396679997444153
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,8,1024,1,0,1.4185559898614883
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,8,2048,1,0,1.5040920078754425
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,8,4096,1,0,2.021228015422821
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,128,1024,128,1,0,29.75379204750061
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,8,8192,1,0,3.6033799946308136
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,8,16384,1,0,12.684676051139832
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,16,4,1,0,0.9621280059218407
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,16,8,1,0,0.9361200109124184
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,16,16,1,0,0.9251960068941116
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,16,32,1,0,0.9199159890413284
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,16,64,1,0,0.9303439930081367
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,16,128,1,0,0.9519520103931427
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,16,256,1,0,1.0086359977722168
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,16,512,1,0,1.01511999219656
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,16,1024,1,0,1.0298320055007935
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,16,2048,1,0,1.3983359932899475
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,16,4096,1,0,2.7778319716453552
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,32,4,1,0,0.9571320116519928
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,32,8,1,0,0.9098479896783829
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,32,16,1,0,0.9410999938845634
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,32,32,1,0,0.9074839949607849
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,32,64,1,0,0.9047159925103188
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,32,128,1,0,0.9070759937167168
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,32,256,1,0,0.9296479895710945
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,32,512,1,0,0.9753879904747009
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,32,1024,1,0,1.2390040159225464
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,32,2048,1,0,2.3983040153980255
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,32,4096,1,0,5.448759973049164
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,64,4,1,0,0.893092006444931
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,64,8,1,0,0.9127679988741875
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,64,16,1,0,0.9135519936680794
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,64,32,1,0,0.9260800033807755
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,64,64,1,0,0.897367998957634
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,64,128,1,0,0.9144720062613487
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,64,256,1,0,0.9729719907045364
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,64,512,1,0,1.1902600079774857
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,64,1024,1,0,2.173355996608734
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,64,2048,1,0,4.576267957687378
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,128,4,1,0,0.9174560010433197
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,128,8,1,0,0.9178480058908463
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,128,16,1,0,0.8890319988131523
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,128,32,1,0,0.845148004591465
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,128,64,1,0,0.8988360017538071
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,128,128,1,0,0.9733719974756241
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,128,256,1,0,1.1633680015802383
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,128,512,1,0,2.071928024291992
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,128,1024,1,0,4.178164005279541
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,256,4,1,0,0.9703759923577309
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,256,8,1,0,0.9406000003218651
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,256,16,1,0,0.9722679927945137
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,256,32,1,0,0.9412799999117851
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,256,64,1,0,1.0394759774208069
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,256,128,1,0,1.2350919842720032
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,256,256,1,0,2.0253679752349854
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,256,512,1,0,3.9789080023765564
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,512,4,1,0,0.8833000063896179
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,512,8,1,0,0.9140520021319389
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,512,16,1,0,0.9623199999332428
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,512,32,1,0,1.0764640122652054
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,512,64,1,0,1.2528079748153687
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,512,128,1,0,2.047976016998291
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,512,256,1,0,3.8823679983615875
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,1024,4,1,0,1.117568016052246
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,1024,8,1,0,1.1617519855499268
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,1024,16,1,0,1.2919919937849045
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,1024,32,1,0,1.476492002606392
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,1024,64,1,0,2.193139970302582
SGLang,0.5.8.post1,NVIDIA GB200,mla_context,trtllm_mla,fp8_block,fp8,16,1024,128,1,0,3.924076020717621
